Dongjin1203's picture
Test GGUF with lightweight build
03bbae3
import os
from dotenv import load_dotenv
class Config:
"""RAG ์‹œ์Šคํ…œ ํ†ตํ•ฉ ์„ค์ • ํด๋ž˜์Šค"""
def __init__(self):
# .env ํŒŒ์ผ ๋กœ๋“œ
load_dotenv()
# ===== API ํ‚ค =====
self.OPENAI_API_KEY = self._get_api_key()
# ===== ๊ฒฝ๋กœ ์„ค์ • =====
# ์ „์ฒ˜๋ฆฌ
self.META_CSV_PATH = "./data/data_list.csv"
self.BASE_FOLDER_PATH = "./data/files/"
self.OUTPUT_CHUNKS_PATH = "./data/rag_chunks_final.csv"
# RAG - ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์šฐ์„ , ์—†์œผ๋ฉด ๊ธฐ๋ณธ๊ฐ’
self.RAG_INPUT_PATH = "./data/rag_chunks_final.csv"
self.DB_DIRECTORY = os.getenv("CHROMA_DB_PATH", "./chroma_db")
# ===== ์ „์ฒ˜๋ฆฌ ์„ค์ • =====
self.CHUNK_SIZE = 1000
self.CHUNK_OVERLAP = 200
self.SEPARATORS = ["\n\n", "\n", " ", ""]
self.MIN_TEXT_LENGTH = 100
# ===== ์ž„๋ฒ ๋”ฉ ์„ค์ • =====
self.EMBEDDING_MODEL_NAME = "text-embedding-3-small"
self.BATCH_SIZE = 50
self.MAX_TOKENS_PER_BATCH = 250000
# ์ฒญํฌ ๊ฒ€์ฆ ๊ธฐ์ค€
self.MIN_CHUNK_LENGTH = 10
self.MAX_CHUNK_LENGTH = 10000
# ===== ๋ฒกํ„ฐ DB ์„ค์ • =====
self.COLLECTION_NAME = "rag_documents"
# ===== ๊ฒ€์ƒ‰ ์„ค์ • =====
self.DEFAULT_TOP_K = 10
self.DEFAULT_ALPHA = 0.5
self.DEFAULT_SEARCH_MODE = "hybrid_rerank"
# ===== LLM ์„ค์ • =====
self.LLM_MODEL_NAME = "gpt-4o-mini"
self.DEFAULT_TEMPERATURE = 0.0
self.DEFAULT_MAX_TOKENS = 1000
# ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
self.SYSTEM_PROMPT = "๋‹น์‹ ์€ RFP(์ œ์•ˆ์š”์ฒญ์„œ) ๋ถ„์„ ๋ฐ ์š”์•ฝ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค."
# ===== GGUF ๋กœ์ปฌ ๋ชจ๋ธ ์„ค์ • =====
# Model Hub ์‚ฌ์šฉ ์—ฌ๋ถ€ (ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์šฐ์„ )
self.USE_MODEL_HUB = os.getenv("USE_MODEL_HUB", "true").lower() == "true"
# Hugging Face Model Hub ์„ค์ •
# Llama-3-Open-Ko-8B ํ•œ๊ตญ์–ด GGUF ๋ชจ๋ธ ์‚ฌ์šฉ
self.MODEL_HUB_REPO = os.getenv(
"MODEL_HUB_REPO",
"Dongjin1203/RFP_Documents_chatbot"
)
self.MODEL_HUB_FILENAME = os.getenv(
"MODEL_HUB_FILENAME",
"Llama-3-Open-Ko-8B.Q4_K_M.gguf"
)
self.MODEL_CACHE_DIR = os.getenv("MODEL_CACHE_DIR", ".cache/models")
# ๋กœ์ปฌ ๊ฒฝ๋กœ (USE_MODEL_HUB=false์ธ ๊ฒฝ์šฐ)
self.GGUF_MODEL_PATH = os.getenv("GGUF_MODEL_PATH", ".cache/models/Llama-3-Open-Ko-8B.Q4_K_M.gguf")
# GGUF GPU ์„ค์ • (T4 Medium ์ตœ์ ํ™” - 8B ๋ชจ๋ธ์šฉ)
self.GGUF_N_GPU_LAYERS = int(os.getenv("GGUF_N_GPU_LAYERS", "35")) # T4์—์„œ 8B ๋ชจ๋ธ ์ „์ฒด๋ฅผ GPU์— ๋กœ๋“œ
self.GGUF_N_CTX = int(os.getenv("GGUF_N_CTX", "2048")) # ์ปจํ…์ŠคํŠธ ๊ธธ์ด
self.GGUF_N_THREADS = int(os.getenv("GGUF_N_THREADS", "4")) # CPU ์Šค๋ ˆ๋“œ (GPU ์‚ฌ์šฉ ์‹œ ๋‚ฎ๊ฒŒ)
self.GGUF_MAX_NEW_TOKENS = int(os.getenv("GGUF_MAX_NEW_TOKENS", "512")) # ์ตœ๋Œ€ ์ƒ์„ฑ ํ† ํฐ
self.GGUF_TEMPERATURE = float(os.getenv("GGUF_TEMPERATURE", "0.7")) # ์ƒ์„ฑ ๋‹ค์–‘์„ฑ
self.GGUF_TOP_P = float(os.getenv("GGUF_TOP_P", "0.9")) # Nucleus sampling
def _get_api_key(self) -> str:
"""ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๋กœ๋“œ"""
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError(
"OPENAI_API_KEY๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
"ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ์— .env ํŒŒ์ผ์„ ๋งŒ๋“ค๊ณ  OPENAI_API_KEY=your-key ๋ฅผ ์ถ”๊ฐ€ํ•˜์„ธ์š”."
)
return api_key
def validate_preprocess(self):
"""์ „์ฒ˜๋ฆฌ ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
if not os.path.exists(self.META_CSV_PATH):
raise FileNotFoundError(
f"๋ฉ”ํƒ€ CSV ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {self.META_CSV_PATH}"
)
if not os.path.exists(self.BASE_FOLDER_PATH):
raise FileNotFoundError(
f"ํŒŒ์ผ ํด๋”๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {self.BASE_FOLDER_PATH}"
)
output_dir = os.path.dirname(self.OUTPUT_CHUNKS_PATH)
if output_dir:
os.makedirs(output_dir, exist_ok=True)
return True
def validate_rag(self):
"""RAG ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
if not self.OPENAI_API_KEY:
raise ValueError("OPENAI_API_KEY๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
return True
def validate_gguf(self):
"""GGUF ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
if not self.USE_MODEL_HUB:
# ๋กœ์ปฌ ํŒŒ์ผ ์‚ฌ์šฉ ์‹œ ๊ฒฝ๋กœ ํ™•์ธ
if not os.path.exists(self.GGUF_MODEL_PATH):
print(f"โš ๏ธ ๊ฒฝ๊ณ : GGUF ๋ชจ๋ธ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค: {self.GGUF_MODEL_PATH}")
print(f" USE_MODEL_HUB=true๋กœ ์„ค์ •ํ•˜์—ฌ ์ž๋™ ๋‹ค์šด๋กœ๋“œํ•˜๊ฑฐ๋‚˜ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ค€๋น„ํ•˜์„ธ์š”.")
# GPU ๋ ˆ์ด์–ด ์„ค์ • ํ™•์ธ
if self.GGUF_N_GPU_LAYERS > 0:
print(f"โœ… GPU ๊ฐ€์† ํ™œ์„ฑํ™”: {self.GGUF_N_GPU_LAYERS}๊ฐœ ๋ ˆ์ด์–ด")
else:
print(f"โš ๏ธ CPU ์ „์šฉ ๋ชจ๋“œ (n_gpu_layers=0)")
return True
def validate_all(self):
"""์ „์ฒด ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
self.validate_preprocess()
self.validate_rag()
self.validate_gguf()
return True
def validate(self):
"""์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ (ํ•˜์œ„ ํ˜ธํ™˜์„ฑ)"""
return self.validate_preprocess()
def print_gguf_config(self):
"""GGUF ์„ค์ • ์ถœ๋ ฅ (๋””๋ฒ„๊น…์šฉ)"""
print("\n" + "="*50)
print("GGUF ๋ชจ๋ธ ์„ค์ •")
print("="*50)
print(f"Model Hub ์‚ฌ์šฉ: {self.USE_MODEL_HUB}")
if self.USE_MODEL_HUB:
print(f"Hub Repo: {self.MODEL_HUB_REPO}")
print(f"Hub ํŒŒ์ผ๋ช…: {self.MODEL_HUB_FILENAME}")
print(f"์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ: {self.MODEL_CACHE_DIR}")
else:
print(f"๋กœ์ปฌ ๊ฒฝ๋กœ: {self.GGUF_MODEL_PATH}")
print(f"\nGPU ์„ค์ •:")
print(f" - GPU ๋ ˆ์ด์–ด: {self.GGUF_N_GPU_LAYERS}")
print(f" - ์ปจํ…์ŠคํŠธ: {self.GGUF_N_CTX}")
print(f" - ์Šค๋ ˆ๋“œ: {self.GGUF_N_THREADS}")
print(f"\n์ƒ์„ฑ ์„ค์ •:")
print(f" - Max Tokens: {self.GGUF_MAX_NEW_TOKENS}")
print(f" - Temperature: {self.GGUF_TEMPERATURE}")
print(f" - Top-P: {self.GGUF_TOP_P}")
print("="*50 + "\n")
# ํ•˜์œ„ ํ˜ธํ™˜์„ฑ์„ ์œ„ํ•œ ๋ณ„์นญ
PreprocessConfig = Config
RAGConfig = Config