Commit ยท
d9d7415
1
Parent(s): fca8e5d
Test GGUF with lightweight build
Browse files- src/generator/generator_gguf.py +52 -39
src/generator/generator_gguf.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from llama_cpp import Llama
|
| 2 |
from typing import Optional, Dict, Any, List
|
| 3 |
import logging
|
| 4 |
import time
|
|
@@ -275,57 +275,70 @@ class GGUFRAGPipeline:
|
|
| 275 |
|
| 276 |
def __init__(
|
| 277 |
self,
|
| 278 |
-
config
|
| 279 |
model: str = None, # ํธํ์ฑ์ฉ (์ฌ์ฉ ์ ํจ)
|
| 280 |
-
top_k: int =
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
|
|
|
| 289 |
):
|
| 290 |
"""
|
| 291 |
์ด๊ธฐํ
|
| 292 |
|
| 293 |
Args:
|
| 294 |
-
config: RAGConfig
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
| 301 |
search_mode: ๊ฒ์ ๋ชจ๋
|
| 302 |
-
top_k: ๊ฒ์ํ ๋ฌธ์ ์
|
| 303 |
alpha: ์๋ฒ ๋ฉ ๊ฐ์ค์น
|
| 304 |
"""
|
| 305 |
self.config = config or RAGConfig()
|
| 306 |
-
self.
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
-
# Generator ์ด๊ธฐํ
|
| 320 |
self.generator = GGUFGenerator(
|
| 321 |
model_path=self.config.GGUF_MODEL_PATH,
|
| 322 |
-
n_gpu_layers=
|
| 323 |
-
n_ctx=
|
| 324 |
-
n_threads=
|
| 325 |
config=self.config,
|
| 326 |
-
max_new_tokens=
|
| 327 |
-
temperature=
|
| 328 |
-
top_p=
|
| 329 |
system_prompt=self.config.SYSTEM_PROMPT
|
| 330 |
)
|
| 331 |
|
|
@@ -345,7 +358,7 @@ class GGUFRAGPipeline:
|
|
| 345 |
|
| 346 |
def _retrieve_and_format(self, query: str) -> str:
|
| 347 |
"""๊ฒ์ ์ํ ๋ฐ ์ปจํ
์คํธ ํฌ๋งทํ
"""
|
| 348 |
-
# ๊ฒ์ ๋ชจ๋์ ๋ฐ๋ผ ๋ฌธ์ ๊ฒ์
|
| 349 |
if self.search_mode == "embedding":
|
| 350 |
docs = self.retriever.search(query, top_k=self.top_k)
|
| 351 |
elif self.search_mode == "embedding_rerank":
|
|
|
|
| 1 |
+
from llama_cpp import Llama
|
| 2 |
from typing import Optional, Dict, Any, List
|
| 3 |
import logging
|
| 4 |
import time
|
|
|
|
| 275 |
|
| 276 |
def __init__(
|
| 277 |
self,
|
| 278 |
+
config=None,
|
| 279 |
model: str = None, # ํธํ์ฑ์ฉ (์ฌ์ฉ ์ ํจ)
|
| 280 |
+
top_k: int = None,
|
| 281 |
+
# GPU ์ค์ (์ ํ์ , config ์ค๋ฒ๋ผ์ด๋)
|
| 282 |
+
n_gpu_layers: int = None,
|
| 283 |
+
n_ctx: int = None,
|
| 284 |
+
n_threads: int = None,
|
| 285 |
+
max_new_tokens: int = None,
|
| 286 |
+
temperature: float = None,
|
| 287 |
+
top_p: float = None,
|
| 288 |
+
search_mode: str = None,
|
| 289 |
+
alpha: float = None
|
| 290 |
):
|
| 291 |
"""
|
| 292 |
์ด๊ธฐํ
|
| 293 |
|
| 294 |
Args:
|
| 295 |
+
config: RAGConfig ๊ฐ์ฒด
|
| 296 |
+
model: ๋ชจ๋ธ ์ด๋ฆ (์ฌ์ฉ ์ ํจ, ํธํ์ฑ์ฉ)
|
| 297 |
+
top_k: ๊ธฐ๋ณธ ๊ฒ์ ๋ฌธ์ ์
|
| 298 |
+
n_gpu_layers: GPU ๋ ์ด์ด ์ (config ์ค๋ฒ๋ผ์ด๋)
|
| 299 |
+
n_ctx: ์ปจํ
์คํธ ๊ธธ์ด (config ์ค๋ฒ๋ผ์ด๋)
|
| 300 |
+
n_threads: CPU ์ค๋ ๋ ์ (config ์ค๋ฒ๋ผ์ด๋)
|
| 301 |
+
max_new_tokens: ์ต๋ ์์ฑ ํ ํฐ (config ์ค๋ฒ๋ผ์ด๋)
|
| 302 |
+
temperature: ์์ฑ ๋ค์์ฑ (config ์ค๋ฒ๋ผ์ด๋)
|
| 303 |
+
top_p: Nucleus sampling (config ์ค๋ฒ๋ผ์ด๋)
|
| 304 |
search_mode: ๊ฒ์ ๋ชจ๋
|
|
|
|
| 305 |
alpha: ์๋ฒ ๋ฉ ๊ฐ์ค์น
|
| 306 |
"""
|
| 307 |
self.config = config or RAGConfig()
|
| 308 |
+
self.top_k = top_k or self.config.DEFAULT_TOP_K
|
| 309 |
+
|
| 310 |
+
# ๊ฒ์ ์ค์
|
| 311 |
+
self.search_mode = search_mode or self.config.DEFAULT_SEARCH_MODE
|
| 312 |
+
self.alpha = alpha if alpha is not None else self.config.DEFAULT_ALPHA
|
| 313 |
+
|
| 314 |
+
# Retriever ์ด๊ธฐํ (RAGRetriever ์ฌ์ฉ)
|
| 315 |
+
logger.info("RAGRetriever ์ด๊ธฐํ ์ค...")
|
| 316 |
+
from src.retriever.retriever import RAGRetriever
|
| 317 |
+
self.retriever = RAGRetriever(config=self.config)
|
| 318 |
+
|
| 319 |
+
# GGUF ์ค์ (ํ๋ผ๋ฏธํฐ๊ฐ ์ฃผ์ด์ง๋ฉด config ์ค๋ฒ๋ผ์ด๋)
|
| 320 |
+
gguf_n_gpu_layers = n_gpu_layers if n_gpu_layers is not None else self.config.GGUF_N_GPU_LAYERS
|
| 321 |
+
gguf_n_ctx = n_ctx if n_ctx is not None else self.config.GGUF_N_CTX
|
| 322 |
+
gguf_n_threads = n_threads if n_threads is not None else self.config.GGUF_N_THREADS
|
| 323 |
+
gguf_max_new_tokens = max_new_tokens if max_new_tokens is not None else self.config.GGUF_MAX_NEW_TOKENS
|
| 324 |
+
gguf_temperature = temperature if temperature is not None else self.config.GGUF_TEMPERATURE
|
| 325 |
+
gguf_top_p = top_p if top_p is not None else self.config.GGUF_TOP_P
|
| 326 |
+
|
| 327 |
+
# GGUFGenerator ์ด๊ธฐํ
|
| 328 |
+
logger.info("GGUFGenerator ์ด๊ธฐํ ์ค...")
|
| 329 |
+
logger.info(f" GPU ๋ ์ด์ด: {gguf_n_gpu_layers}")
|
| 330 |
+
logger.info(f" ์ปจํ
์คํธ: {gguf_n_ctx}")
|
| 331 |
+
logger.info(f" ์ค๋ ๋: {gguf_n_threads}")
|
| 332 |
|
|
|
|
| 333 |
self.generator = GGUFGenerator(
|
| 334 |
model_path=self.config.GGUF_MODEL_PATH,
|
| 335 |
+
n_gpu_layers=gguf_n_gpu_layers,
|
| 336 |
+
n_ctx=gguf_n_ctx,
|
| 337 |
+
n_threads=gguf_n_threads,
|
| 338 |
config=self.config,
|
| 339 |
+
max_new_tokens=gguf_max_new_tokens,
|
| 340 |
+
temperature=gguf_temperature,
|
| 341 |
+
top_p=gguf_top_p,
|
| 342 |
system_prompt=self.config.SYSTEM_PROMPT
|
| 343 |
)
|
| 344 |
|
|
|
|
| 358 |
|
| 359 |
def _retrieve_and_format(self, query: str) -> str:
|
| 360 |
"""๊ฒ์ ์ํ ๋ฐ ์ปจํ
์คํธ ํฌ๋งทํ
"""
|
| 361 |
+
# ๊ฒ์ ๋ชจ๋์ ๋ฐ๋ผ ๋ฌธ์ ๊ฒ์ (RAGRetriever ๋ฉ์๋ ์ฌ์ฉ)
|
| 362 |
if self.search_mode == "embedding":
|
| 363 |
docs = self.retriever.search(query, top_k=self.top_k)
|
| 364 |
elif self.search_mode == "embedding_rerank":
|