Commit
ยท
03c7f28
1
Parent(s):
05fc904
Test GGUF with lightweight build
Browse files- Dockerfile +30 -16
- requirements.txt +3 -0
- src/generator/generator.py.old +335 -0
- src/generator/generator_gguf.py +1 -1
- src/visualization/chatbot_app.py +103 -24
Dockerfile
CHANGED
|
@@ -1,16 +1,29 @@
|
|
| 1 |
-
# =====
|
| 2 |
-
FROM
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
| 5 |
RUN apt-get update && apt-get install -y \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
git \
|
| 7 |
curl \
|
| 8 |
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
WORKDIR /app
|
| 12 |
|
| 13 |
-
#
|
| 14 |
ENV HOME=/app
|
| 15 |
ENV STREAMLIT_SERVER_FILE_WATCHER_TYPE=none
|
| 16 |
ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
|
@@ -22,25 +35,26 @@ ENV MKL_NUM_THREADS=1
|
|
| 22 |
ENV NUMEXPR_NUM_THREADS=1
|
| 23 |
ENV CHROMA_DB_PATH=/app/.cache/chroma_db
|
| 24 |
|
| 25 |
-
# ์บ์ ๋๋ ํ ๋ฆฌ ์์ฑ ๋ฐ ๊ถํ ์ค์
|
| 26 |
RUN mkdir -p /app/.cache/huggingface /app/.streamlit && \
|
| 27 |
chmod -R 777 /app/.cache /app/.streamlit
|
| 28 |
|
| 29 |
-
# ์์กด์ฑ ๋ณต์ฌ
|
| 30 |
COPY requirements.txt .
|
| 31 |
|
| 32 |
-
# pip ์
๊ทธ๋ ์ด๋
|
| 33 |
-
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
| 34 |
-
pip install --no-cache-dir -r requirements.txt
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
#
|
|
|
|
|
|
|
|
|
|
| 40 |
RUN chmod +x /app/start.sh
|
| 41 |
|
| 42 |
-
# Streamlit ํฌํธ
|
| 43 |
EXPOSE 7860
|
| 44 |
-
|
| 45 |
-
# ์์ ์คํฌ๋ฆฝํธ ์คํ
|
| 46 |
CMD ["/app/start.sh"]
|
|
|
|
| 1 |
+
# ===== ๊ฒฝ๋ Dockerfile (์ฌ์ ๋น๋ llama-cpp-python ์ฌ์ฉ) =====
|
| 2 |
+
FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
|
| 3 |
|
| 4 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 5 |
+
ENV PYTHONUNBUFFERED=1
|
| 6 |
+
|
| 7 |
+
# Python 3.12 ์ค์น
|
| 8 |
RUN apt-get update && apt-get install -y \
|
| 9 |
+
software-properties-common \
|
| 10 |
+
&& add-apt-repository ppa:deadsnakes/ppa \
|
| 11 |
+
&& apt-get update && apt-get install -y \
|
| 12 |
+
python3.12 \
|
| 13 |
+
python3.12-dev \
|
| 14 |
+
python3-pip \
|
| 15 |
git \
|
| 16 |
curl \
|
| 17 |
&& rm -rf /var/lib/apt/lists/*
|
| 18 |
|
| 19 |
+
# Python ๊ธฐ๋ณธ ์ค์
|
| 20 |
+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 \
|
| 21 |
+
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1
|
| 22 |
+
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
|
| 23 |
+
|
| 24 |
WORKDIR /app
|
| 25 |
|
| 26 |
+
# ํ๊ฒฝ๋ณ์
|
| 27 |
ENV HOME=/app
|
| 28 |
ENV STREAMLIT_SERVER_FILE_WATCHER_TYPE=none
|
| 29 |
ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
|
|
|
| 35 |
ENV NUMEXPR_NUM_THREADS=1
|
| 36 |
ENV CHROMA_DB_PATH=/app/.cache/chroma_db
|
| 37 |
|
|
|
|
| 38 |
RUN mkdir -p /app/.cache/huggingface /app/.streamlit && \
|
| 39 |
chmod -R 777 /app/.cache /app/.streamlit
|
| 40 |
|
|
|
|
| 41 |
COPY requirements.txt .
|
| 42 |
|
| 43 |
+
# pip ์
๊ทธ๋ ์ด๋
|
| 44 |
+
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
|
|
|
|
| 45 |
|
| 46 |
+
# PyTorch CUDA ๋ฒ์ ์ค์น
|
| 47 |
+
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
| 48 |
+
|
| 49 |
+
# ์ฌ์ ๋น๋๋ llama-cpp-python (CUDA) ์ค์น
|
| 50 |
+
RUN pip install --no-cache-dir llama-cpp-python \
|
| 51 |
+
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
|
| 52 |
|
| 53 |
+
# ๋๋จธ์ง ์์กด์ฑ
|
| 54 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 55 |
+
|
| 56 |
+
COPY . .
|
| 57 |
RUN chmod +x /app/start.sh
|
| 58 |
|
|
|
|
| 59 |
EXPOSE 7860
|
|
|
|
|
|
|
| 60 |
CMD ["/app/start.sh"]
|
requirements.txt
CHANGED
|
@@ -28,6 +28,9 @@ transformers>=4.44.0
|
|
| 28 |
sentence-transformers>=3.0.0
|
| 29 |
rapidfuzz>=3.9.0
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
# ===== Hugging Face Hub =====
|
| 32 |
huggingface-hub>=0.25.0
|
| 33 |
|
|
|
|
| 28 |
sentence-transformers>=3.0.0
|
| 29 |
rapidfuzz>=3.9.0
|
| 30 |
|
| 31 |
+
# ===== GGUF ๋ก์ปฌ ๋ชจ๋ธ (์ถ๊ฐ!) =====
|
| 32 |
+
llama-cpp-python>=0.2.90
|
| 33 |
+
|
| 34 |
# ===== Hugging Face Hub =====
|
| 35 |
huggingface-hub>=0.25.0
|
| 36 |
|
src/generator/generator.py.old
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_openai import ChatOpenAI
|
| 2 |
+
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
| 3 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 4 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
|
| 5 |
+
from langchain_core.messages import HumanMessage, AIMessage
|
| 6 |
+
from langsmith import traceable
|
| 7 |
+
import time
|
| 8 |
+
from typing import List, Dict
|
| 9 |
+
|
| 10 |
+
from src.utils.config import RAGConfig
|
| 11 |
+
from src.retriever.retriever import RAGRetriever
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class RAGPipeline:
|
| 15 |
+
"""๋ํํ RAG ํ์ดํ๋ผ์ธ - LangChain Chain ๊ธฐ๋ฐ"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, config: RAGConfig = None, model: str = None, top_k: int = None):
|
| 18 |
+
"""์ด๊ธฐํ"""
|
| 19 |
+
self.config = config or RAGConfig()
|
| 20 |
+
self.model = model or self.config.LLM_MODEL_NAME
|
| 21 |
+
self.top_k = top_k or self.config.DEFAULT_TOP_K
|
| 22 |
+
|
| 23 |
+
# ๊ฒ์ ์ค์
|
| 24 |
+
self.search_mode = self.config.DEFAULT_SEARCH_MODE
|
| 25 |
+
self.alpha = self.config.DEFAULT_ALPHA
|
| 26 |
+
|
| 27 |
+
# LLM ์ด๊ธฐํ (LangChain ChatOpenAI)
|
| 28 |
+
self.llm = ChatOpenAI(
|
| 29 |
+
model=self.model,
|
| 30 |
+
openai_api_key=self.config.OPENAI_API_KEY,
|
| 31 |
+
timeout=60.0,
|
| 32 |
+
max_retries=3
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Retriever ์ด๊ธฐํ
|
| 36 |
+
self.retriever = RAGRetriever(config=self.config)
|
| 37 |
+
|
| 38 |
+
# ๋ํ ํ์คํ ๋ฆฌ
|
| 39 |
+
self.chat_history: List[Dict] = []
|
| 40 |
+
|
| 41 |
+
# ๋ง์ง๋ง ๊ฒ์ ๊ฒฐ๊ณผ ์ ์ฅ (sources ๋ฐํ์ฉ)
|
| 42 |
+
self._last_retrieved_docs = []
|
| 43 |
+
|
| 44 |
+
# ํ๋กฌํํธ ํ
ํ๋ฆฟ (๋ํ ํ์คํ ๋ฆฌ ํฌํจ)
|
| 45 |
+
self.prompt = ChatPromptTemplate.from_messages([
|
| 46 |
+
("system", """๋น์ ์ ๊ณต๊ณต์
์ฐฐ RFP๋ฅผ ๋ถ์ํ๋ ์
์ฐฐ๋ฉ์ดํธ ์ฌ๋ด ๋ถ์๊ฐ์
๋๋ค. ์ ๊ณต๋ ์ปจํ
์คํธ๋ง์ผ๋ก ์๊ตฌ์ฌํญยท์์ฐยท๋์ ๊ธฐ๊ดยท์ ์ถ ๋ฐฉ์ ๋ฑ์ ๊ตฌ์กฐํํด ์์ฌ๊ฒฐ์ ์ ์ง์ํ์ธ์.
|
| 47 |
+
|
| 48 |
+
# ๊ท์น
|
| 49 |
+
- ๋ต๋ณ์ ํ๊ตญ์ด๋ก ์์ฑํฉ๋๋ค.
|
| 50 |
+
- ์ปจํ
์คํธ ๋ฐ ๋ด์ฉ์ ์ถ์ธกํ์ง ์์ต๋๋ค.
|
| 51 |
+
- ์ ๋ณด๊ฐ ์์ผ๋ฉด "๋ฌธ์์์ ํด๋น ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."๋ผ๊ณ ๋ฐํ๋๋ค.
|
| 52 |
+
- ์ฌ๋ฌ ๋ฌธ์๋ฅผ ๋น๊ตํ ๋๋ ๋ฌธ์๋ณ ์ฐจ์ด๋ฅผ ํ ๋๋ ๋ชฉ๋ก์ผ๋ก ์ ๋ฆฌํฉ๋๋ค.
|
| 53 |
+
- ์ซ์์๋ ๊ฐ๋ฅํ ๋จ์๋ฅผ ํฌํจํฉ๋๋ค.
|
| 54 |
+
- ์ง์ ๋ํ ๋งฅ๋ฝ์ ๋ฐ์ํฉ๋๋ค.
|
| 55 |
+
|
| 56 |
+
# ๋ต๋ณ ํ์
|
| 57 |
+
1. ํ ์ค ์์ฝ: ์ง๋ฌธ ํต์ฌ์ ํ๋ ๋ฌธ์ฅ์ผ๋ก ์์ฑํฉ๋๋ค.
|
| 58 |
+
2. ์์ธ ๋ต๋ณ: [์๊ตฌ์ฌํญ], [๋์ ๊ธฐ๊ด], [์์ฐ], [์ ์ถ ํ์/๋ฐฉ๋ฒ], [ํ๊ฐ ๊ธฐ์ค] ๋ฑ ๋ฌธ์์์ ํ์ธ๋ ํญ๋ชฉ๋ง ์ ๋ฆฌํฉ๋๋ค.
|
| 59 |
+
3. ๊ทผ๊ฑฐ ์ ๋ณด: ์ ๋ต๋ณ์ ๊ทผ๊ฑฐ๊ฐ ๋ ๋ฌธ์ฅ์ด๋ ๋ฌธ๋จ์ ์์ฝํฉ๋๋ค.
|
| 60 |
+
4. ๋ถ์กฑํ ์ ๋ณด: ๋ฌธ์์์ ์ฐพ์ ์ ์๋ ํญ๋ชฉ์ "๋ฌธ์์์ ํ์ธ ๋ถ๊ฐ"๋ก ํ๊ธฐํฉ๋๋ค."""),
|
| 61 |
+
|
| 62 |
+
# ๋ํ ํ์คํ ๋ฆฌ
|
| 63 |
+
MessagesPlaceholder(variable_name="chat_history"),
|
| 64 |
+
|
| 65 |
+
# ํ์ฌ ์ง๋ฌธ๊ณผ ์ปจํ
์คํธ
|
| 66 |
+
("user", """# ์ปจํ
์คํธ
|
| 67 |
+
{context}
|
| 68 |
+
|
| 69 |
+
# ์ง๋ฌธ
|
| 70 |
+
{question}
|
| 71 |
+
|
| 72 |
+
์ ๊ท์น์ ๋ฐ๋ผ ๋ต๋ณํ์ธ์.""")
|
| 73 |
+
])
|
| 74 |
+
|
| 75 |
+
# Chain ๊ตฌ์ฑ
|
| 76 |
+
self.chain = (
|
| 77 |
+
{
|
| 78 |
+
"context": RunnableLambda(self._retrieve_and_format),
|
| 79 |
+
"question": RunnablePassthrough(),
|
| 80 |
+
"chat_history": RunnableLambda(lambda x: self._get_chat_history())
|
| 81 |
+
}
|
| 82 |
+
| self.prompt
|
| 83 |
+
| self.llm
|
| 84 |
+
| StrOutputParser()
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
print(f"โ
RAG ํ์ดํ๋ผ์ธ ์ด๊ธฐํ ์๋ฃ")
|
| 88 |
+
print(f" - ๋ชจ๋ธ: {self.model}")
|
| 89 |
+
print(f" - ๊ธฐ๋ณธ top_k: {self.top_k}")
|
| 90 |
+
print(f" - ๊ฒ์ ๋ชจ๋: {self.search_mode}")
|
| 91 |
+
|
| 92 |
+
def _get_chat_history(self) -> List:
|
| 93 |
+
"""๋ํ ํ์คํ ๋ฆฌ๋ฅผ LangChain ๋ฉ์์ง ํ์์ผ๋ก ๋ณํ"""
|
| 94 |
+
messages = []
|
| 95 |
+
for msg in self.chat_history:
|
| 96 |
+
if msg["role"] == "user":
|
| 97 |
+
messages.append(HumanMessage(content=msg["content"]))
|
| 98 |
+
else:
|
| 99 |
+
messages.append(AIMessage(content=msg["content"]))
|
| 100 |
+
return messages
|
| 101 |
+
|
| 102 |
+
def _retrieve_and_format(self, query: str) -> str:
|
| 103 |
+
"""๊ฒ์ ์ํ ๋ฐ ์ปจํ
์คํธ ํฌ๋งทํ
"""
|
| 104 |
+
# ๊ฒ์ ๋ชจ๋์ ๋ฐ๋ผ ๋ฌธ์ ๊ฒ์
|
| 105 |
+
if self.search_mode == "embedding":
|
| 106 |
+
docs = self.retriever.search(query, top_k=self.top_k)
|
| 107 |
+
elif self.search_mode == "hybrid":
|
| 108 |
+
docs = self.retriever.hybrid_search(query, top_k=self.top_k, alpha=self.alpha)
|
| 109 |
+
elif self.search_mode == "hybrid_rerank":
|
| 110 |
+
docs = self.retriever.hybrid_search_with_rerank(
|
| 111 |
+
query, top_k=self.top_k, alpha=self.alpha
|
| 112 |
+
)
|
| 113 |
+
else:
|
| 114 |
+
docs = self.retriever.search(query, top_k=self.top_k)
|
| 115 |
+
|
| 116 |
+
# ๋ง์ง๋ง ๊ฒ์ ๊ฒฐ๊ณผ ์ ์ฅ
|
| 117 |
+
self._last_retrieved_docs = docs
|
| 118 |
+
|
| 119 |
+
# ์ปจํ
์คํธ ํฌ๋งทํ
|
| 120 |
+
return self._format_context(docs)
|
| 121 |
+
|
| 122 |
+
def _format_context(self, retrieved_docs: list) -> str:
|
| 123 |
+
"""๊ฒ์๋ ๋ฌธ์๋ฅผ ์ปจํ
์คํธ๋ก ๋ณํ"""
|
| 124 |
+
if not retrieved_docs:
|
| 125 |
+
return "๊ด๋ จ ๋ฌธ์๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
|
| 126 |
+
|
| 127 |
+
context_parts = []
|
| 128 |
+
for i, doc in enumerate(retrieved_docs, 1):
|
| 129 |
+
context_parts.append(f"[๋ฌธ์ {i}]\n{doc['content']}\n")
|
| 130 |
+
return "\n".join(context_parts)
|
| 131 |
+
|
| 132 |
+
def _format_sources(self, retrieved_docs: list) -> list:
|
| 133 |
+
"""๊ฒ์๋ ๋ฌธ์๋ฅผ sources ํ์์ผ๋ก ๋ณํ"""
|
| 134 |
+
sources = []
|
| 135 |
+
for doc in retrieved_docs:
|
| 136 |
+
source_info = {
|
| 137 |
+
'content': doc['content'],
|
| 138 |
+
'metadata': doc['metadata'],
|
| 139 |
+
'filename': doc.get('filename', 'N/A'),
|
| 140 |
+
'organization': doc.get('organization', 'N/A')
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
# ๊ฒ์ ๋ชจ๋์ ๋ฐ๋ผ ์ ์ ํ๋๊ฐ ๋ค๋ฆ
|
| 144 |
+
if 'rerank_score' in doc:
|
| 145 |
+
source_info['score'] = doc['rerank_score']
|
| 146 |
+
source_info['score_type'] = 'rerank'
|
| 147 |
+
elif 'hybrid_score' in doc:
|
| 148 |
+
source_info['score'] = doc['hybrid_score']
|
| 149 |
+
source_info['score_type'] = 'hybrid'
|
| 150 |
+
elif 'relevance_score' in doc:
|
| 151 |
+
source_info['score'] = doc['relevance_score']
|
| 152 |
+
source_info['score_type'] = 'embedding'
|
| 153 |
+
else:
|
| 154 |
+
source_info['score'] = 0
|
| 155 |
+
source_info['score_type'] = 'unknown'
|
| 156 |
+
|
| 157 |
+
sources.append(source_info)
|
| 158 |
+
return sources
|
| 159 |
+
|
| 160 |
+
@traceable(
|
| 161 |
+
name="RAG_Generate_Answer",
|
| 162 |
+
metadata={"component": "generator", "version": "2.0"}
|
| 163 |
+
)
|
| 164 |
+
def generate_answer(
|
| 165 |
+
self,
|
| 166 |
+
query: str,
|
| 167 |
+
top_k: int = None,
|
| 168 |
+
search_mode: str = None,
|
| 169 |
+
alpha: float = None
|
| 170 |
+
) -> dict:
|
| 171 |
+
"""
|
| 172 |
+
๋ต๋ณ ์์ฑ (Chain ๊ธฐ๋ฐ)
|
| 173 |
+
|
| 174 |
+
Args:
|
| 175 |
+
query: ์ง๋ฌธ
|
| 176 |
+
top_k: ๊ฒ์ํ ๋ฌธ์ ์
|
| 177 |
+
search_mode: ๊ฒ์ ๋ชจ๋ ("embedding", "hybrid", "hybrid_rerank")
|
| 178 |
+
alpha: ์๋ฒ ๋ฉ ๊ฐ์ค์น (0~1)
|
| 179 |
+
|
| 180 |
+
Returns:
|
| 181 |
+
dict: answer, sources, search_mode, usage
|
| 182 |
+
"""
|
| 183 |
+
try:
|
| 184 |
+
start_time = time.time()
|
| 185 |
+
|
| 186 |
+
# ํ๋ผ๋ฏธํฐ ์ค์
|
| 187 |
+
if top_k is not None:
|
| 188 |
+
self.top_k = top_k
|
| 189 |
+
if search_mode is not None:
|
| 190 |
+
self.search_mode = search_mode
|
| 191 |
+
if alpha is not None:
|
| 192 |
+
self.alpha = alpha
|
| 193 |
+
|
| 194 |
+
# Chain ์คํ
|
| 195 |
+
answer = self.chain.invoke(query)
|
| 196 |
+
|
| 197 |
+
elapsed_time = time.time() - start_time
|
| 198 |
+
|
| 199 |
+
# ๋ํ ํ์คํ ๋ฆฌ์ ์ถ๊ฐ
|
| 200 |
+
self.chat_history.append({"role": "user", "content": query})
|
| 201 |
+
self.chat_history.append({"role": "assistant", "content": answer})
|
| 202 |
+
|
| 203 |
+
# ํ ํฐ ์ฌ์ฉ๋ ์ถ์ (LangChain์์๋ ์ง์ ์ ๊ทผ ์ด๋ ค์)
|
| 204 |
+
estimated_tokens = len(query.split()) + len(answer.split()) * 2
|
| 205 |
+
|
| 206 |
+
return {
|
| 207 |
+
'answer': answer,
|
| 208 |
+
'sources': self._format_sources(self._last_retrieved_docs),
|
| 209 |
+
'search_mode': self.search_mode,
|
| 210 |
+
'elapsed_time': elapsed_time,
|
| 211 |
+
'usage': {
|
| 212 |
+
'total_tokens': estimated_tokens,
|
| 213 |
+
'prompt_tokens': 0,
|
| 214 |
+
'completion_tokens': 0
|
| 215 |
+
}
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
except Exception as e:
|
| 219 |
+
print(f"โ ๋ต๋ณ ์์ฑ ์คํจ: {e}")
|
| 220 |
+
import traceback
|
| 221 |
+
traceback.print_exc()
|
| 222 |
+
raise RuntimeError(f"๋ต๋ณ ์์ฑ ์คํจ: {str(e)}") from e
|
| 223 |
+
|
| 224 |
+
def chat(self, query: str) -> str:
|
| 225 |
+
"""
|
| 226 |
+
๊ฐ๋จํ ๋ํ ์ธํฐํ์ด์ค
|
| 227 |
+
|
| 228 |
+
Args:
|
| 229 |
+
query: ์ง๋ฌธ
|
| 230 |
+
|
| 231 |
+
Returns:
|
| 232 |
+
str: ๋ต๋ณ ํ
์คํธ๋ง ๋ฐํ
|
| 233 |
+
"""
|
| 234 |
+
result = self.generate_answer(query)
|
| 235 |
+
return result['answer']
|
| 236 |
+
|
| 237 |
+
def clear_history(self):
|
| 238 |
+
"""๋ํ ํ์คํ ๋ฆฌ ์ด๊ธฐํ"""
|
| 239 |
+
self.chat_history = []
|
| 240 |
+
print("๐๏ธ ๋ํ ํ์คํ ๋ฆฌ๊ฐ ์ด๊ธฐํ๋์์ต๋๋ค.")
|
| 241 |
+
|
| 242 |
+
def get_history(self) -> List[Dict]:
|
| 243 |
+
"""๋ํ ํ์คํ ๋ฆฌ ๋ฐํ"""
|
| 244 |
+
return self.chat_history.copy()
|
| 245 |
+
|
| 246 |
+
def set_search_config(self, search_mode: str = None, top_k: int = None, alpha: float = None):
|
| 247 |
+
"""๊ฒ์ ์ค์ ๋ณ๊ฒฝ"""
|
| 248 |
+
if search_mode is not None:
|
| 249 |
+
self.search_mode = search_mode
|
| 250 |
+
if top_k is not None:
|
| 251 |
+
self.top_k = top_k
|
| 252 |
+
if alpha is not None:
|
| 253 |
+
self.alpha = alpha
|
| 254 |
+
|
| 255 |
+
print(f"๐ง ๊ฒ์ ์ค์ ๋ณ๊ฒฝ: mode={self.search_mode}, top_k={self.top_k}, alpha={self.alpha}")
|
| 256 |
+
|
| 257 |
+
def print_result(self, result: dict, query: str = None):
|
| 258 |
+
"""๊ฒฐ๊ณผ ์ถ๋ ฅ"""
|
| 259 |
+
print("\n" + "="*60)
|
| 260 |
+
if query:
|
| 261 |
+
print(f"์ง๋ฌธ: {query}")
|
| 262 |
+
print(f"๊ฒ์ ๋ชจ๋: {result.get('search_mode', 'N/A')}")
|
| 263 |
+
if 'elapsed_time' in result:
|
| 264 |
+
print(f"์์ ์๊ฐ: {result['elapsed_time']:.2f}์ด")
|
| 265 |
+
print("="*60)
|
| 266 |
+
print(f"\n๐ฌ ๋ต๋ณ:\n{result['answer']}")
|
| 267 |
+
print(f"\n๐ ์ฐธ๊ณ ๋ฌธ์ ({len(result['sources'])}๊ฐ):")
|
| 268 |
+
for i, source in enumerate(result['sources'], 1):
|
| 269 |
+
score = source.get('score', 0)
|
| 270 |
+
score_type = source.get('score_type', '')
|
| 271 |
+
print(f" [{i}] {source['filename']}")
|
| 272 |
+
print(f" ์ ์: {score:.3f} ({score_type})")
|
| 273 |
+
print("="*60)
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# ๋ํํ ์คํ
|
| 277 |
+
def interactive_mode():
|
| 278 |
+
"""๋ํํ ๋ชจ๋ ์คํ"""
|
| 279 |
+
print("=" * 60)
|
| 280 |
+
print("๋ํํ RAG ์์คํ
์ด๊ธฐํ ์ค...")
|
| 281 |
+
print("=" * 60)
|
| 282 |
+
|
| 283 |
+
config = RAGConfig()
|
| 284 |
+
pipeline = RAGPipeline(config=config)
|
| 285 |
+
|
| 286 |
+
print("\n" + "=" * 60)
|
| 287 |
+
print("๋ํํ ๋ชจ๋ ์์")
|
| 288 |
+
print("๋ช
๋ น์ด: 'quit' (์ข
๋ฃ), 'clear' (ํ์คํ ๋ฆฌ ์ด๊ธฐํ), 'mode' (๊ฒ์๋ชจ๋ ๋ณ๊ฒฝ)")
|
| 289 |
+
print("=" * 60)
|
| 290 |
+
|
| 291 |
+
while True:
|
| 292 |
+
user_query = input("\n์ง๋ฌธ: ").strip()
|
| 293 |
+
|
| 294 |
+
if not user_query:
|
| 295 |
+
continue
|
| 296 |
+
|
| 297 |
+
if user_query.lower() in ['quit', 'exit', '์ข
๋ฃ', 'q']:
|
| 298 |
+
print("์์คํ
์ ์ข
๋ฃํฉ๋๋ค.")
|
| 299 |
+
break
|
| 300 |
+
|
| 301 |
+
if user_query.lower() == 'clear':
|
| 302 |
+
pipeline.clear_history()
|
| 303 |
+
continue
|
| 304 |
+
|
| 305 |
+
if user_query.lower() == 'mode':
|
| 306 |
+
print("\n๊ฒ์ ๋ชจ๋ ์ ํ:")
|
| 307 |
+
print("1. embedding - ์๋ฒ ๋ฉ ๊ฒ์")
|
| 308 |
+
print("2. hybrid - BM25 + ์๋ฒ ๋ฉ")
|
| 309 |
+
print("3. hybrid_rerank - Hybrid + Re-ranker (๊ถ์ฅ)")
|
| 310 |
+
choice = input("์ ํ (1/2/3): ").strip()
|
| 311 |
+
modes = {'1': 'embedding', '2': 'hybrid', '3': 'hybrid_rerank'}
|
| 312 |
+
if choice in modes:
|
| 313 |
+
pipeline.set_search_config(search_mode=modes[choice])
|
| 314 |
+
continue
|
| 315 |
+
|
| 316 |
+
try:
|
| 317 |
+
result = pipeline.generate_answer(query=user_query)
|
| 318 |
+
pipeline.print_result(result, user_query)
|
| 319 |
+
|
| 320 |
+
# ์์ค ์ถ๋ ฅ ์ฌ๋ถ
|
| 321 |
+
show_source = input("\n์ฐธ์กฐ ๋ฌธ์ ์์ธ ๋ณด๊ธฐ? (y/n): ").strip().lower()
|
| 322 |
+
if show_source == 'y':
|
| 323 |
+
for i, source in enumerate(result['sources'], 1):
|
| 324 |
+
print(f"\n{'='*40}")
|
| 325 |
+
print(f"[๋ฌธ์ {i}] {source['filename']}")
|
| 326 |
+
print(f"๋ฐ์ฃผ๊ธฐ๊ด: {source['organization']}")
|
| 327 |
+
print(f"๋ด์ฉ:\n{source['content'][:500]}...")
|
| 328 |
+
|
| 329 |
+
except Exception as e:
|
| 330 |
+
print(f"โ ์ค๋ฅ ๋ฐ์: {e}")
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
# ์ฌ์ฉ ์์
|
| 334 |
+
if __name__ == "__main__":
|
| 335 |
+
interactive_mode()
|
src/generator/generator_gguf.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
from typing import Optional, Dict, Any, List
|
| 3 |
import logging
|
| 4 |
import time
|
|
|
|
| 1 |
+
from llama_cpp import Llama
|
| 2 |
from typing import Optional, Dict, Any, List
|
| 3 |
import logging
|
| 4 |
import time
|
src/visualization/chatbot_app.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
๊ณต๊ณต๊ธฐ๊ด ์ฌ์
์ ์์ RAG ์ฑ๋ด
|
| 3 |
|
| 4 |
๊ธฐ๋ฅ:
|
| 5 |
-
- ๋ชจ๋ธ ์ ํ (API/๋ก์ปฌ)
|
| 6 |
- Query Router (๊ฒ์ vs ์ง์ ๋ต๋ณ)
|
| 7 |
- RAG ๊ธฐ๋ฐ ์ง์์๋ต (Hybrid Search + Re-ranker)
|
| 8 |
- ์กฐ๊ฑด๋ถ ์ฐธ๊ณ ๋ฌธ์ ํ์
|
|
@@ -111,6 +111,14 @@ st.markdown("""
|
|
| 111 |
margin-top: 0.5rem;
|
| 112 |
border-left: 3px solid #ff9800;
|
| 113 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
</style>
|
| 115 |
""", unsafe_allow_html=True)
|
| 116 |
|
|
@@ -132,15 +140,47 @@ if 'show_routing_info' not in st.session_state:
|
|
| 132 |
# ===== RAG ํ์ดํ๋ผ์ธ ์ด๊ธฐํ =====
|
| 133 |
@st.cache_resource
|
| 134 |
def initialize_rag(model_type):
|
| 135 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
try:
|
| 137 |
config = RAGConfig()
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
except Exception as e:
|
| 143 |
-
|
|
|
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
# ===== ๋ต๋ณ ์์ฑ =====
|
|
@@ -156,12 +196,14 @@ def generate_answer(query: str, top_k: int = 10, search_mode: str = "hybrid_rera
|
|
| 156 |
return result
|
| 157 |
|
| 158 |
except Exception as e:
|
|
|
|
|
|
|
| 159 |
return {
|
| 160 |
-
'answer': f"โ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}",
|
| 161 |
'sources': [],
|
| 162 |
-
'used_retrieval': False,
|
| 163 |
'search_mode': search_mode,
|
| 164 |
-
'routing_info': None,
|
| 165 |
'usage': {'total_tokens': 0, 'prompt_tokens': 0, 'completion_tokens': 0}
|
| 166 |
}
|
| 167 |
|
|
@@ -173,8 +215,8 @@ def display_message(
|
|
| 173 |
sources: list = None,
|
| 174 |
usage: dict = None,
|
| 175 |
search_mode: str = None,
|
| 176 |
-
used_retrieval: bool = None,
|
| 177 |
-
routing_info: dict = None
|
| 178 |
):
|
| 179 |
"""
|
| 180 |
๋ฉ์์ง๋ฅผ ํ๋ฉด์ ํ์
|
|
@@ -231,7 +273,7 @@ def display_message(
|
|
| 231 |
'hybrid': '๐ Hybrid Search',
|
| 232 |
'embedding_rerank': '๐ ์๋ฒ ๋ฉ + Re-ranker',
|
| 233 |
'embedding': '๐ ์๋ฒ ๋ฉ ๊ฒ์',
|
| 234 |
-
'direct': '๐ฌ Direct (๊ฒ์ ์์)'
|
| 235 |
}
|
| 236 |
st.markdown(f"""
|
| 237 |
<div class="search-mode-info">
|
|
@@ -299,14 +341,33 @@ def main():
|
|
| 299 |
model_type = st.selectbox(
|
| 300 |
"์์ฑ ๋ชจ๋ธ ์ ํ",
|
| 301 |
options=[
|
| 302 |
-
"API ๋ชจ๋ธ (GPT)"
|
|
|
|
| 303 |
],
|
| 304 |
index=0,
|
| 305 |
-
help="OpenAI API
|
| 306 |
)
|
| 307 |
|
| 308 |
-
#
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
st.markdown("---")
|
| 312 |
|
|
@@ -344,7 +405,7 @@ def main():
|
|
| 344 |
"๊ฒ์ํ ๋ฌธ์ ๊ฐ์ (Top-K)",
|
| 345 |
min_value=1,
|
| 346 |
max_value=20,
|
| 347 |
-
value=10,
|
| 348 |
help="๊ฒ์ํ ๋ฌธ์ ๊ฐ์"
|
| 349 |
)
|
| 350 |
|
|
@@ -380,7 +441,7 @@ def main():
|
|
| 380 |
st.rerun()
|
| 381 |
|
| 382 |
if st.button("๐พ ๋ํ ๋ค์ด๋ก๋", use_container_width=True):
|
| 383 |
-
if len(st.session_state.conv_manager) > 0:
|
| 384 |
json_str = st.session_state.conv_manager.export_to_json()
|
| 385 |
|
| 386 |
st.download_button(
|
|
@@ -416,20 +477,38 @@ def main():
|
|
| 416 |
if (st.session_state.rag_pipeline is None or
|
| 417 |
st.session_state.model_type != model_type):
|
| 418 |
|
| 419 |
-
with st.spinner(f"๐ {model_type} ์ด๊ธฐํ ์ค..."):
|
| 420 |
rag, error, rag_type = initialize_rag(model_type)
|
| 421 |
|
| 422 |
if error:
|
| 423 |
-
st.error(f"โ RAG ํ์ดํ๋ผ์ธ ์ด๊ธฐํ
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
st.info("""
|
| 425 |
### ๐ก ํด๊ฒฐ ๋ฐฉ๋ฒ
|
| 426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
1. ChromaDB๊ฐ ์์ฑ๋์๋์ง ํ์ธ:
|
| 428 |
```bash
|
| 429 |
python main.py --step embed
|
| 430 |
```
|
| 431 |
|
| 432 |
-
2. OpenAI API
|
| 433 |
```bash
|
| 434 |
# .env ํ์ผ
|
| 435 |
OPENAI_API_KEY=your-key-here
|
|
@@ -449,7 +528,7 @@ pip install rank-bm25 sentence-transformers
|
|
| 449 |
# ===== ๋ํ ํ์คํ ๋ฆฌ ํ์ =====
|
| 450 |
st.markdown("---")
|
| 451 |
|
| 452 |
-
if len(st.session_state.conv_manager) == 0:
|
| 453 |
st.info("""
|
| 454 |
### ๐ ํ์ํฉ๋๋ค!
|
| 455 |
|
|
@@ -470,8 +549,8 @@ pip install rank-bm25 sentence-transformers
|
|
| 470 |
sources=msg.get('sources'),
|
| 471 |
usage=msg.get('usage'),
|
| 472 |
search_mode=msg.get('search_mode'),
|
| 473 |
-
used_retrieval=msg.get('used_retrieval'),
|
| 474 |
-
routing_info=msg.get('routing_info')
|
| 475 |
)
|
| 476 |
|
| 477 |
# ===== ์ง๋ฌธ ์
๋ ฅ =====
|
|
|
|
| 2 |
๊ณต๊ณต๊ธฐ๊ด ์ฌ์
์ ์์ RAG ์ฑ๋ด
|
| 3 |
|
| 4 |
๊ธฐ๋ฅ:
|
| 5 |
+
- ๋ชจ๋ธ ์ ํ (API/๋ก์ปฌ GGUF)
|
| 6 |
- Query Router (๊ฒ์ vs ์ง์ ๋ต๋ณ)
|
| 7 |
- RAG ๊ธฐ๋ฐ ์ง์์๋ต (Hybrid Search + Re-ranker)
|
| 8 |
- ์กฐ๊ฑด๋ถ ์ฐธ๊ณ ๋ฌธ์ ํ์
|
|
|
|
| 111 |
margin-top: 0.5rem;
|
| 112 |
border-left: 3px solid #ff9800;
|
| 113 |
}
|
| 114 |
+
.model-info {
|
| 115 |
+
background-color: #f3e5f5;
|
| 116 |
+
padding: 0.8rem 1rem;
|
| 117 |
+
border-radius: 0.3rem;
|
| 118 |
+
font-size: 0.9rem;
|
| 119 |
+
margin: 0.5rem 0;
|
| 120 |
+
border-left: 3px solid #9c27b0;
|
| 121 |
+
}
|
| 122 |
</style>
|
| 123 |
""", unsafe_allow_html=True)
|
| 124 |
|
|
|
|
| 140 |
# ===== RAG ํ์ดํ๋ผ์ธ ์ด๊ธฐํ =====
|
| 141 |
@st.cache_resource
|
| 142 |
def initialize_rag(model_type):
|
| 143 |
+
"""
|
| 144 |
+
RAG ํ์ดํ๋ผ์ธ ์ด๊ธฐํ
|
| 145 |
+
|
| 146 |
+
Args:
|
| 147 |
+
model_type: "API ๋ชจ๋ธ (GPT)" ๋๋ "๋ก์ปฌ ๋ชจ๋ธ (GGUF)"
|
| 148 |
+
|
| 149 |
+
Returns:
|
| 150 |
+
(rag_pipeline, error_message, model_name)
|
| 151 |
+
"""
|
| 152 |
try:
|
| 153 |
config = RAGConfig()
|
| 154 |
+
|
| 155 |
+
if model_type == "API ๋ชจ๋ธ (GPT)":
|
| 156 |
+
# API ๋ชจ๋ธ ์ฌ์ฉ
|
| 157 |
+
from src.generator.generator import RAGPipeline
|
| 158 |
+
rag = RAGPipeline(config=config)
|
| 159 |
+
return rag, None, "OpenAI GPT"
|
| 160 |
+
|
| 161 |
+
elif model_type == "๋ก์ปฌ ๋ชจ๋ธ (GGUF)":
|
| 162 |
+
# GGUF ๋ชจ๋ธ ์ฌ์ฉ
|
| 163 |
+
from src.generator.generator_gguf import GGUFRAGPipeline
|
| 164 |
+
|
| 165 |
+
# T4 GPU ์ต์ ์ค์
|
| 166 |
+
rag = GGUFRAGPipeline(
|
| 167 |
+
config=config,
|
| 168 |
+
n_gpu_layers=35, # T4์์ ์ ์ฒด ๋ ์ด์ด GPU ์ฌ์ฉ
|
| 169 |
+
n_ctx=2048, # ์ปจํ
์คํธ ๊ธธ์ด
|
| 170 |
+
n_threads=4, # CPU ์ค๋ ๋ (GPU ์ฌ์ฉ ์ ๋ฎ๊ฒ)
|
| 171 |
+
max_new_tokens=512, # ์ต๋ ์์ฑ ํ ํฐ
|
| 172 |
+
temperature=0.7,
|
| 173 |
+
top_p=0.9
|
| 174 |
+
)
|
| 175 |
+
return rag, None, "Llama-3-Ko-8B (GGUF)"
|
| 176 |
+
|
| 177 |
+
else:
|
| 178 |
+
return None, f"์ ์ ์๋ ๋ชจ๋ธ ํ์
: {model_type}", None
|
| 179 |
|
| 180 |
except Exception as e:
|
| 181 |
+
import traceback
|
| 182 |
+
error_detail = traceback.format_exc()
|
| 183 |
+
return None, f"{str(e)}\n\n{error_detail}", None
|
| 184 |
|
| 185 |
|
| 186 |
# ===== ๋ต๋ณ ์์ฑ =====
|
|
|
|
| 196 |
return result
|
| 197 |
|
| 198 |
except Exception as e:
|
| 199 |
+
import traceback
|
| 200 |
+
error_detail = traceback.format_exc()
|
| 201 |
return {
|
| 202 |
+
'answer': f"โ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}\n\n{error_detail}",
|
| 203 |
'sources': [],
|
| 204 |
+
'used_retrieval': False,
|
| 205 |
'search_mode': search_mode,
|
| 206 |
+
'routing_info': None,
|
| 207 |
'usage': {'total_tokens': 0, 'prompt_tokens': 0, 'completion_tokens': 0}
|
| 208 |
}
|
| 209 |
|
|
|
|
| 215 |
sources: list = None,
|
| 216 |
usage: dict = None,
|
| 217 |
search_mode: str = None,
|
| 218 |
+
used_retrieval: bool = None,
|
| 219 |
+
routing_info: dict = None
|
| 220 |
):
|
| 221 |
"""
|
| 222 |
๋ฉ์์ง๋ฅผ ํ๋ฉด์ ํ์
|
|
|
|
| 273 |
'hybrid': '๐ Hybrid Search',
|
| 274 |
'embedding_rerank': '๐ ์๋ฒ ๋ฉ + Re-ranker',
|
| 275 |
'embedding': '๐ ์๋ฒ ๋ฉ ๊ฒ์',
|
| 276 |
+
'direct': '๐ฌ Direct (๊ฒ์ ์์)'
|
| 277 |
}
|
| 278 |
st.markdown(f"""
|
| 279 |
<div class="search-mode-info">
|
|
|
|
| 341 |
model_type = st.selectbox(
|
| 342 |
"์์ฑ ๋ชจ๋ธ ์ ํ",
|
| 343 |
options=[
|
| 344 |
+
"API ๋ชจ๋ธ (GPT)",
|
| 345 |
+
"๋ก์ปฌ ๋ชจ๋ธ (GGUF)"
|
| 346 |
],
|
| 347 |
index=0,
|
| 348 |
+
help="OpenAI API ๋๋ ๋ก์ปฌ GGUF ๋ชจ๋ธ ์ ํ"
|
| 349 |
)
|
| 350 |
|
| 351 |
+
# ๋ชจ๋ธ๋ณ ์ ๋ณด ํ์
|
| 352 |
+
if model_type == "API ๋ชจ๋ธ (GPT)":
|
| 353 |
+
st.markdown("""
|
| 354 |
+
<div class="model-info">
|
| 355 |
+
๐ <b>OpenAI GPT ๋ชจ๋ธ</b><br>
|
| 356 |
+
โข ๋น ๋ฅด๊ณ ์์ ์ <br>
|
| 357 |
+
โข API ํค ํ์<br>
|
| 358 |
+
โข ๋น์ฉ ๋ฐ์ (ํ ํฐ๋น)
|
| 359 |
+
</div>
|
| 360 |
+
""", unsafe_allow_html=True)
|
| 361 |
+
else:
|
| 362 |
+
st.markdown("""
|
| 363 |
+
<div class="model-info">
|
| 364 |
+
๐ฅ๏ธ <b>Llama-3-Ko-8B (GGUF)</b><br>
|
| 365 |
+
โข T4 GPU ๊ฐ์<br>
|
| 366 |
+
โข ๋ก์ปฌ ์คํ (๋ฌด๋ฃ)<br>
|
| 367 |
+
โข ์ด๊ธฐ ๋ก๋ฉ ์๊ฐ ์์<br>
|
| 368 |
+
โข 35๊ฐ ๋ ์ด์ด GPU ์ฌ์ฉ
|
| 369 |
+
</div>
|
| 370 |
+
""", unsafe_allow_html=True)
|
| 371 |
|
| 372 |
st.markdown("---")
|
| 373 |
|
|
|
|
| 405 |
"๊ฒ์ํ ๋ฌธ์ ๊ฐ์ (Top-K)",
|
| 406 |
min_value=1,
|
| 407 |
max_value=20,
|
| 408 |
+
value=10,
|
| 409 |
help="๊ฒ์ํ ๋ฌธ์ ๊ฐ์"
|
| 410 |
)
|
| 411 |
|
|
|
|
| 441 |
st.rerun()
|
| 442 |
|
| 443 |
if st.button("๐พ ๋ํ ๋ค์ด๋ก๋", use_container_width=True):
|
| 444 |
+
if len(st.session_state.conv_manager) > 0:
|
| 445 |
json_str = st.session_state.conv_manager.export_to_json()
|
| 446 |
|
| 447 |
st.download_button(
|
|
|
|
| 477 |
if (st.session_state.rag_pipeline is None or
|
| 478 |
st.session_state.model_type != model_type):
|
| 479 |
|
| 480 |
+
with st.spinner(f"๐ {model_type} ์ด๊ธฐํ ์ค... (GGUF ๋ชจ๋ธ์ 1~2๋ถ ์์๋ ์ ์์ต๋๋ค)"):
|
| 481 |
rag, error, rag_type = initialize_rag(model_type)
|
| 482 |
|
| 483 |
if error:
|
| 484 |
+
st.error(f"โ RAG ํ์ดํ๋ผ์ธ ์ด๊ธฐํ ์คํจ")
|
| 485 |
+
|
| 486 |
+
with st.expander("๐ ์๋ฌ ์์ธ ์ ๋ณด"):
|
| 487 |
+
st.code(error)
|
| 488 |
+
|
| 489 |
st.info("""
|
| 490 |
### ๐ก ํด๊ฒฐ ๋ฐฉ๋ฒ
|
| 491 |
|
| 492 |
+
**GGUF ๋ชจ๋ธ ์คํจ ์:**
|
| 493 |
+
1. llama-cpp-python ์ค์น ํ์ธ:
|
| 494 |
+
```bash
|
| 495 |
+
pip install llama-cpp-python
|
| 496 |
+
```
|
| 497 |
+
|
| 498 |
+
2. GGUF ๋ชจ๋ธ ํ์ผ ํ์ธ:
|
| 499 |
+
- config.yaml์ GGUF_MODEL_PATH ๋๋
|
| 500 |
+
- MODEL_HUB_REPO ์ค์ ํ์ธ
|
| 501 |
+
|
| 502 |
+
3. GPU ๋ฉ๋ชจ๋ฆฌ ๋ถ์กฑ ์:
|
| 503 |
+
- n_gpu_layers ๊ฐ ๊ฐ์ (35 โ 20)
|
| 504 |
+
|
| 505 |
+
**API ๋ชจ๋ธ ์คํจ ์:**
|
| 506 |
1. ChromaDB๊ฐ ์์ฑ๋์๋์ง ํ์ธ:
|
| 507 |
```bash
|
| 508 |
python main.py --step embed
|
| 509 |
```
|
| 510 |
|
| 511 |
+
2. OpenAI API ํค ํ์ธ:
|
| 512 |
```bash
|
| 513 |
# .env ํ์ผ
|
| 514 |
OPENAI_API_KEY=your-key-here
|
|
|
|
| 528 |
# ===== ๋ํ ํ์คํ ๋ฆฌ ํ์ =====
|
| 529 |
st.markdown("---")
|
| 530 |
|
| 531 |
+
if len(st.session_state.conv_manager) == 0:
|
| 532 |
st.info("""
|
| 533 |
### ๐ ํ์ํฉ๋๋ค!
|
| 534 |
|
|
|
|
| 549 |
sources=msg.get('sources'),
|
| 550 |
usage=msg.get('usage'),
|
| 551 |
search_mode=msg.get('search_mode'),
|
| 552 |
+
used_retrieval=msg.get('used_retrieval'),
|
| 553 |
+
routing_info=msg.get('routing_info')
|
| 554 |
)
|
| 555 |
|
| 556 |
# ===== ์ง๋ฌธ ์
๋ ฅ =====
|