Dongjin1203 commited on
Commit
03c7f28
ยท
1 Parent(s): 05fc904

Test GGUF with lightweight build

Browse files
Dockerfile CHANGED
@@ -1,16 +1,29 @@
1
- # ===== Python 3.12 Dockerfile =====
2
- FROM python:3.12-slim
3
 
4
- # ์‹œ์Šคํ…œ ํŒจํ‚ค์ง€ ์„ค์น˜
 
 
 
5
  RUN apt-get update && apt-get install -y \
 
 
 
 
 
 
6
  git \
7
  curl \
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
- # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ
 
 
 
 
11
  WORKDIR /app
12
 
13
- # ===== ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์„ค์ • =====
14
  ENV HOME=/app
15
  ENV STREAMLIT_SERVER_FILE_WATCHER_TYPE=none
16
  ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
@@ -22,25 +35,26 @@ ENV MKL_NUM_THREADS=1
22
  ENV NUMEXPR_NUM_THREADS=1
23
  ENV CHROMA_DB_PATH=/app/.cache/chroma_db
24
 
25
- # ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ ๋ฐ ๊ถŒํ•œ ์„ค์ •
26
  RUN mkdir -p /app/.cache/huggingface /app/.streamlit && \
27
  chmod -R 777 /app/.cache /app/.streamlit
28
 
29
- # ์˜์กด์„ฑ ๋ณต์‚ฌ
30
  COPY requirements.txt .
31
 
32
- # pip ์—…๊ทธ๋ ˆ์ด๋“œ & ์˜์กด์„ฑ ์„ค์น˜
33
- RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
34
- pip install --no-cache-dir -r requirements.txt
35
 
36
- # ํ”„๋กœ์ ํŠธ ํŒŒ์ผ ๋ณต์‚ฌ
37
- COPY . .
 
 
 
 
38
 
39
- # ์‹œ์ž‘ ์Šคํฌ๋ฆฝํŠธ ์‹คํ–‰ ๊ถŒํ•œ ๋ถ€์—ฌ
 
 
 
40
  RUN chmod +x /app/start.sh
41
 
42
- # Streamlit ํฌํŠธ
43
  EXPOSE 7860
44
-
45
- # ์‹œ์ž‘ ์Šคํฌ๋ฆฝํŠธ ์‹คํ–‰
46
  CMD ["/app/start.sh"]
 
1
+ # ===== ๊ฒฝ๋Ÿ‰ Dockerfile (์‚ฌ์ „ ๋นŒ๋“œ llama-cpp-python ์‚ฌ์šฉ) =====
2
+ FROM nvidia/cuda:12.1.0-runtime-ubuntu22.04
3
 
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ # Python 3.12 ์„ค์น˜
8
  RUN apt-get update && apt-get install -y \
9
+ software-properties-common \
10
+ && add-apt-repository ppa:deadsnakes/ppa \
11
+ && apt-get update && apt-get install -y \
12
+ python3.12 \
13
+ python3.12-dev \
14
+ python3-pip \
15
  git \
16
  curl \
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
+ # Python ๊ธฐ๋ณธ ์„ค์ •
20
+ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 \
21
+ && update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1
22
+ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
23
+
24
  WORKDIR /app
25
 
26
+ # ํ™˜๊ฒฝ๋ณ€์ˆ˜
27
  ENV HOME=/app
28
  ENV STREAMLIT_SERVER_FILE_WATCHER_TYPE=none
29
  ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
 
35
  ENV NUMEXPR_NUM_THREADS=1
36
  ENV CHROMA_DB_PATH=/app/.cache/chroma_db
37
 
 
38
  RUN mkdir -p /app/.cache/huggingface /app/.streamlit && \
39
  chmod -R 777 /app/.cache /app/.streamlit
40
 
 
41
  COPY requirements.txt .
42
 
43
+ # pip ์—…๊ทธ๋ ˆ์ด๋“œ
44
+ RUN pip install --no-cache-dir --upgrade pip setuptools wheel
 
45
 
46
+ # PyTorch CUDA ๋ฒ„์ „ ์„ค์น˜
47
+ RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
48
+
49
+ # ์‚ฌ์ „ ๋นŒ๋“œ๋œ llama-cpp-python (CUDA) ์„ค์น˜
50
+ RUN pip install --no-cache-dir llama-cpp-python \
51
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
52
 
53
+ # ๋‚˜๋จธ์ง€ ์˜์กด์„ฑ
54
+ RUN pip install --no-cache-dir -r requirements.txt
55
+
56
+ COPY . .
57
  RUN chmod +x /app/start.sh
58
 
 
59
  EXPOSE 7860
 
 
60
  CMD ["/app/start.sh"]
requirements.txt CHANGED
@@ -28,6 +28,9 @@ transformers>=4.44.0
28
  sentence-transformers>=3.0.0
29
  rapidfuzz>=3.9.0
30
 
 
 
 
31
  # ===== Hugging Face Hub =====
32
  huggingface-hub>=0.25.0
33
 
 
28
  sentence-transformers>=3.0.0
29
  rapidfuzz>=3.9.0
30
 
31
+ # ===== GGUF ๋กœ์ปฌ ๋ชจ๋ธ (์ถ”๊ฐ€!) =====
32
+ llama-cpp-python>=0.2.90
33
+
34
  # ===== Hugging Face Hub =====
35
  huggingface-hub>=0.25.0
36
 
src/generator/generator.py.old ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import ChatOpenAI
2
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.runnables import RunnablePassthrough, RunnableLambda
5
+ from langchain_core.messages import HumanMessage, AIMessage
6
+ from langsmith import traceable
7
+ import time
8
+ from typing import List, Dict
9
+
10
+ from src.utils.config import RAGConfig
11
+ from src.retriever.retriever import RAGRetriever
12
+
13
+
14
+ class RAGPipeline:
15
+ """๋Œ€ํ™”ํ˜• RAG ํŒŒ์ดํ”„๋ผ์ธ - LangChain Chain ๊ธฐ๋ฐ˜"""
16
+
17
+ def __init__(self, config: RAGConfig = None, model: str = None, top_k: int = None):
18
+ """์ดˆ๊ธฐํ™”"""
19
+ self.config = config or RAGConfig()
20
+ self.model = model or self.config.LLM_MODEL_NAME
21
+ self.top_k = top_k or self.config.DEFAULT_TOP_K
22
+
23
+ # ๊ฒ€์ƒ‰ ์„ค์ •
24
+ self.search_mode = self.config.DEFAULT_SEARCH_MODE
25
+ self.alpha = self.config.DEFAULT_ALPHA
26
+
27
+ # LLM ์ดˆ๊ธฐํ™” (LangChain ChatOpenAI)
28
+ self.llm = ChatOpenAI(
29
+ model=self.model,
30
+ openai_api_key=self.config.OPENAI_API_KEY,
31
+ timeout=60.0,
32
+ max_retries=3
33
+ )
34
+
35
+ # Retriever ์ดˆ๊ธฐํ™”
36
+ self.retriever = RAGRetriever(config=self.config)
37
+
38
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ
39
+ self.chat_history: List[Dict] = []
40
+
41
+ # ๋งˆ์ง€๋ง‰ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ €์žฅ (sources ๋ฐ˜ํ™˜์šฉ)
42
+ self._last_retrieved_docs = []
43
+
44
+ # ํ”„๋กฌํ”„ํŠธ ํ…œํ”Œ๋ฆฟ (๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ํฌํ•จ)
45
+ self.prompt = ChatPromptTemplate.from_messages([
46
+ ("system", """๋‹น์‹ ์€ ๊ณต๊ณต์ž…์ฐฐ RFP๋ฅผ ๋ถ„์„ํ•˜๋Š” ์ž…์ฐฐ๋ฉ”์ดํŠธ ์‚ฌ๋‚ด ๋ถ„์„๊ฐ€์ž…๋‹ˆ๋‹ค. ์ œ๊ณต๋œ ์ปจํ…์ŠคํŠธ๋งŒ์œผ๋กœ ์š”๊ตฌ์‚ฌํ•ญยท์˜ˆ์‚ฐยท๋Œ€์ƒ ๊ธฐ๊ด€ยท์ œ์ถœ ๋ฐฉ์‹ ๋“ฑ์„ ๊ตฌ์กฐํ™”ํ•ด ์˜์‚ฌ๊ฒฐ์ •์„ ์ง€์›ํ•˜์„ธ์š”.
47
+
48
+ # ๊ทœ์น™
49
+ - ๋‹ต๋ณ€์€ ํ•œ๊ตญ์–ด๋กœ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.
50
+ - ์ปจํ…์ŠคํŠธ ๋ฐ– ๋‚ด์šฉ์„ ์ถ”์ธกํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.
51
+ - ์ •๋ณด๊ฐ€ ์—†์œผ๋ฉด "๋ฌธ์„œ์—์„œ ํ•ด๋‹น ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."๋ผ๊ณ  ๋ฐํž™๋‹ˆ๋‹ค.
52
+ - ์—ฌ๋Ÿฌ ๋ฌธ์„œ๋ฅผ ๋น„๊ตํ•  ๋•Œ๋Š” ๋ฌธ์„œ๋ณ„ ์ฐจ์ด๋ฅผ ํ‘œ ๋˜๋Š” ๋ชฉ๋ก์œผ๋กœ ์ •๋ฆฌํ•ฉ๋‹ˆ๋‹ค.
53
+ - ์ˆซ์ž์—๋Š” ๊ฐ€๋Šฅํ•œ ๋‹จ์œ„๋ฅผ ํฌํ•จํ•ฉ๋‹ˆ๋‹ค.
54
+ - ์ง์ „ ๋Œ€ํ™” ๋งฅ๋ฝ์„ ๋ฐ˜์˜ํ•ฉ๋‹ˆ๋‹ค.
55
+
56
+ # ๋‹ต๋ณ€ ํ˜•์‹
57
+ 1. ํ•œ ์ค„ ์š”์•ฝ: ์งˆ๋ฌธ ํ•ต์‹ฌ์„ ํ•œ๋‘ ๋ฌธ์žฅ์œผ๋กœ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค.
58
+ 2. ์ƒ์„ธ ๋‹ต๋ณ€: [์š”๊ตฌ์‚ฌํ•ญ], [๋Œ€์ƒ ๊ธฐ๊ด€], [์˜ˆ์‚ฐ], [์ œ์ถœ ํ˜•์‹/๋ฐฉ๋ฒ•], [ํ‰๊ฐ€ ๊ธฐ์ค€] ๋“ฑ ๋ฌธ์„œ์—์„œ ํ™•์ธ๋œ ํ•ญ๋ชฉ๋งŒ ์ •๋ฆฌํ•ฉ๋‹ˆ๋‹ค.
59
+ 3. ๊ทผ๊ฑฐ ์ •๋ณด: ์œ„ ๋‹ต๋ณ€์˜ ๊ทผ๊ฑฐ๊ฐ€ ๋œ ๋ฌธ์žฅ์ด๋‚˜ ๋ฌธ๋‹จ์„ ์š”์•ฝํ•ฉ๋‹ˆ๋‹ค.
60
+ 4. ๋ถ€์กฑํ•œ ์ •๋ณด: ๋ฌธ์„œ์—์„œ ์ฐพ์„ ์ˆ˜ ์—†๋Š” ํ•ญ๋ชฉ์€ "๋ฌธ์„œ์—์„œ ํ™•์ธ ๋ถˆ๊ฐ€"๋กœ ํ‘œ๊ธฐํ•ฉ๋‹ˆ๋‹ค."""),
61
+
62
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ
63
+ MessagesPlaceholder(variable_name="chat_history"),
64
+
65
+ # ํ˜„์žฌ ์งˆ๋ฌธ๊ณผ ์ปจํ…์ŠคํŠธ
66
+ ("user", """# ์ปจํ…์ŠคํŠธ
67
+ {context}
68
+
69
+ # ์งˆ๋ฌธ
70
+ {question}
71
+
72
+ ์œ„ ๊ทœ์น™์— ๋”ฐ๋ผ ๋‹ต๋ณ€ํ•˜์„ธ์š”.""")
73
+ ])
74
+
75
+ # Chain ๊ตฌ์„ฑ
76
+ self.chain = (
77
+ {
78
+ "context": RunnableLambda(self._retrieve_and_format),
79
+ "question": RunnablePassthrough(),
80
+ "chat_history": RunnableLambda(lambda x: self._get_chat_history())
81
+ }
82
+ | self.prompt
83
+ | self.llm
84
+ | StrOutputParser()
85
+ )
86
+
87
+ print(f"โœ… RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
88
+ print(f" - ๋ชจ๋ธ: {self.model}")
89
+ print(f" - ๊ธฐ๋ณธ top_k: {self.top_k}")
90
+ print(f" - ๊ฒ€์ƒ‰ ๋ชจ๋“œ: {self.search_mode}")
91
+
92
+ def _get_chat_history(self) -> List:
93
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๋ฅผ LangChain ๋ฉ”์‹œ์ง€ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
94
+ messages = []
95
+ for msg in self.chat_history:
96
+ if msg["role"] == "user":
97
+ messages.append(HumanMessage(content=msg["content"]))
98
+ else:
99
+ messages.append(AIMessage(content=msg["content"]))
100
+ return messages
101
+
102
+ def _retrieve_and_format(self, query: str) -> str:
103
+ """๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ ๋ฐ ์ปจํ…์ŠคํŠธ ํฌ๋งทํŒ…"""
104
+ # ๊ฒ€์ƒ‰ ๋ชจ๋“œ์— ๋”ฐ๋ผ ๋ฌธ์„œ ๊ฒ€์ƒ‰
105
+ if self.search_mode == "embedding":
106
+ docs = self.retriever.search(query, top_k=self.top_k)
107
+ elif self.search_mode == "hybrid":
108
+ docs = self.retriever.hybrid_search(query, top_k=self.top_k, alpha=self.alpha)
109
+ elif self.search_mode == "hybrid_rerank":
110
+ docs = self.retriever.hybrid_search_with_rerank(
111
+ query, top_k=self.top_k, alpha=self.alpha
112
+ )
113
+ else:
114
+ docs = self.retriever.search(query, top_k=self.top_k)
115
+
116
+ # ๋งˆ์ง€๋ง‰ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ €์žฅ
117
+ self._last_retrieved_docs = docs
118
+
119
+ # ์ปจํ…์ŠคํŠธ ํฌ๋งทํŒ…
120
+ return self._format_context(docs)
121
+
122
+ def _format_context(self, retrieved_docs: list) -> str:
123
+ """๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋ฅผ ์ปจํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜"""
124
+ if not retrieved_docs:
125
+ return "๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
126
+
127
+ context_parts = []
128
+ for i, doc in enumerate(retrieved_docs, 1):
129
+ context_parts.append(f"[๋ฌธ์„œ {i}]\n{doc['content']}\n")
130
+ return "\n".join(context_parts)
131
+
132
+ def _format_sources(self, retrieved_docs: list) -> list:
133
+ """๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋ฅผ sources ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
134
+ sources = []
135
+ for doc in retrieved_docs:
136
+ source_info = {
137
+ 'content': doc['content'],
138
+ 'metadata': doc['metadata'],
139
+ 'filename': doc.get('filename', 'N/A'),
140
+ 'organization': doc.get('organization', 'N/A')
141
+ }
142
+
143
+ # ๊ฒ€์ƒ‰ ๋ชจ๋“œ์— ๋”ฐ๋ผ ์ ์ˆ˜ ํ•„๋“œ๊ฐ€ ๋‹ค๋ฆ„
144
+ if 'rerank_score' in doc:
145
+ source_info['score'] = doc['rerank_score']
146
+ source_info['score_type'] = 'rerank'
147
+ elif 'hybrid_score' in doc:
148
+ source_info['score'] = doc['hybrid_score']
149
+ source_info['score_type'] = 'hybrid'
150
+ elif 'relevance_score' in doc:
151
+ source_info['score'] = doc['relevance_score']
152
+ source_info['score_type'] = 'embedding'
153
+ else:
154
+ source_info['score'] = 0
155
+ source_info['score_type'] = 'unknown'
156
+
157
+ sources.append(source_info)
158
+ return sources
159
+
160
+ @traceable(
161
+ name="RAG_Generate_Answer",
162
+ metadata={"component": "generator", "version": "2.0"}
163
+ )
164
+ def generate_answer(
165
+ self,
166
+ query: str,
167
+ top_k: int = None,
168
+ search_mode: str = None,
169
+ alpha: float = None
170
+ ) -> dict:
171
+ """
172
+ ๋‹ต๋ณ€ ์ƒ์„ฑ (Chain ๊ธฐ๋ฐ˜)
173
+
174
+ Args:
175
+ query: ์งˆ๋ฌธ
176
+ top_k: ๊ฒ€์ƒ‰ํ•  ๋ฌธ์„œ ์ˆ˜
177
+ search_mode: ๊ฒ€์ƒ‰ ๋ชจ๋“œ ("embedding", "hybrid", "hybrid_rerank")
178
+ alpha: ์ž„๋ฒ ๋”ฉ ๊ฐ€์ค‘์น˜ (0~1)
179
+
180
+ Returns:
181
+ dict: answer, sources, search_mode, usage
182
+ """
183
+ try:
184
+ start_time = time.time()
185
+
186
+ # ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ •
187
+ if top_k is not None:
188
+ self.top_k = top_k
189
+ if search_mode is not None:
190
+ self.search_mode = search_mode
191
+ if alpha is not None:
192
+ self.alpha = alpha
193
+
194
+ # Chain ์‹คํ–‰
195
+ answer = self.chain.invoke(query)
196
+
197
+ elapsed_time = time.time() - start_time
198
+
199
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ์— ์ถ”๊ฐ€
200
+ self.chat_history.append({"role": "user", "content": query})
201
+ self.chat_history.append({"role": "assistant", "content": answer})
202
+
203
+ # ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰ ์ถ”์ • (LangChain์—์„œ๋Š” ์ง์ ‘ ์ ‘๊ทผ ์–ด๋ ค์›€)
204
+ estimated_tokens = len(query.split()) + len(answer.split()) * 2
205
+
206
+ return {
207
+ 'answer': answer,
208
+ 'sources': self._format_sources(self._last_retrieved_docs),
209
+ 'search_mode': self.search_mode,
210
+ 'elapsed_time': elapsed_time,
211
+ 'usage': {
212
+ 'total_tokens': estimated_tokens,
213
+ 'prompt_tokens': 0,
214
+ 'completion_tokens': 0
215
+ }
216
+ }
217
+
218
+ except Exception as e:
219
+ print(f"โŒ ๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {e}")
220
+ import traceback
221
+ traceback.print_exc()
222
+ raise RuntimeError(f"๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}") from e
223
+
224
+ def chat(self, query: str) -> str:
225
+ """
226
+ ๊ฐ„๋‹จํ•œ ๋Œ€ํ™” ์ธํ„ฐํŽ˜์ด์Šค
227
+
228
+ Args:
229
+ query: ์งˆ๋ฌธ
230
+
231
+ Returns:
232
+ str: ๋‹ต๋ณ€ ํ…์ŠคํŠธ๋งŒ ๋ฐ˜ํ™˜
233
+ """
234
+ result = self.generate_answer(query)
235
+ return result['answer']
236
+
237
+ def clear_history(self):
238
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ์ดˆ๊ธฐํ™”"""
239
+ self.chat_history = []
240
+ print("๐Ÿ—‘๏ธ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
241
+
242
+ def get_history(self) -> List[Dict]:
243
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ฐ˜ํ™˜"""
244
+ return self.chat_history.copy()
245
+
246
+ def set_search_config(self, search_mode: str = None, top_k: int = None, alpha: float = None):
247
+ """๊ฒ€์ƒ‰ ์„ค์ • ๋ณ€๊ฒฝ"""
248
+ if search_mode is not None:
249
+ self.search_mode = search_mode
250
+ if top_k is not None:
251
+ self.top_k = top_k
252
+ if alpha is not None:
253
+ self.alpha = alpha
254
+
255
+ print(f"๐Ÿ”ง ๊ฒ€์ƒ‰ ์„ค์ • ๋ณ€๊ฒฝ: mode={self.search_mode}, top_k={self.top_k}, alpha={self.alpha}")
256
+
257
+ def print_result(self, result: dict, query: str = None):
258
+ """๊ฒฐ๊ณผ ์ถœ๋ ฅ"""
259
+ print("\n" + "="*60)
260
+ if query:
261
+ print(f"์งˆ๋ฌธ: {query}")
262
+ print(f"๊ฒ€์ƒ‰ ๋ชจ๋“œ: {result.get('search_mode', 'N/A')}")
263
+ if 'elapsed_time' in result:
264
+ print(f"์†Œ์š” ์‹œ๊ฐ„: {result['elapsed_time']:.2f}์ดˆ")
265
+ print("="*60)
266
+ print(f"\n๐Ÿ’ฌ ๋‹ต๋ณ€:\n{result['answer']}")
267
+ print(f"\n๐Ÿ“š ์ฐธ๊ณ  ๋ฌธ์„œ ({len(result['sources'])}๊ฐœ):")
268
+ for i, source in enumerate(result['sources'], 1):
269
+ score = source.get('score', 0)
270
+ score_type = source.get('score_type', '')
271
+ print(f" [{i}] {source['filename']}")
272
+ print(f" ์ ์ˆ˜: {score:.3f} ({score_type})")
273
+ print("="*60)
274
+
275
+
276
+ # ๋Œ€ํ™”ํ˜• ์‹คํ–‰
277
+ def interactive_mode():
278
+ """๋Œ€ํ™”ํ˜• ๋ชจ๋“œ ์‹คํ–‰"""
279
+ print("=" * 60)
280
+ print("๋Œ€ํ™”ํ˜• RAG ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์ค‘...")
281
+ print("=" * 60)
282
+
283
+ config = RAGConfig()
284
+ pipeline = RAGPipeline(config=config)
285
+
286
+ print("\n" + "=" * 60)
287
+ print("๋Œ€ํ™”ํ˜• ๋ชจ๋“œ ์‹œ์ž‘")
288
+ print("๋ช…๋ น์–ด: 'quit' (์ข…๋ฃŒ), 'clear' (ํžˆ์Šคํ† ๋ฆฌ ์ดˆ๊ธฐํ™”), 'mode' (๊ฒ€์ƒ‰๋ชจ๋“œ ๋ณ€๊ฒฝ)")
289
+ print("=" * 60)
290
+
291
+ while True:
292
+ user_query = input("\n์งˆ๋ฌธ: ").strip()
293
+
294
+ if not user_query:
295
+ continue
296
+
297
+ if user_query.lower() in ['quit', 'exit', '์ข…๋ฃŒ', 'q']:
298
+ print("์‹œ์Šคํ…œ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.")
299
+ break
300
+
301
+ if user_query.lower() == 'clear':
302
+ pipeline.clear_history()
303
+ continue
304
+
305
+ if user_query.lower() == 'mode':
306
+ print("\n๊ฒ€์ƒ‰ ๋ชจ๋“œ ์„ ํƒ:")
307
+ print("1. embedding - ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰")
308
+ print("2. hybrid - BM25 + ์ž„๋ฒ ๋”ฉ")
309
+ print("3. hybrid_rerank - Hybrid + Re-ranker (๊ถŒ์žฅ)")
310
+ choice = input("์„ ํƒ (1/2/3): ").strip()
311
+ modes = {'1': 'embedding', '2': 'hybrid', '3': 'hybrid_rerank'}
312
+ if choice in modes:
313
+ pipeline.set_search_config(search_mode=modes[choice])
314
+ continue
315
+
316
+ try:
317
+ result = pipeline.generate_answer(query=user_query)
318
+ pipeline.print_result(result, user_query)
319
+
320
+ # ์†Œ์Šค ์ถœ๋ ฅ ์—ฌ๋ถ€
321
+ show_source = input("\n์ฐธ์กฐ ๋ฌธ์„œ ์ƒ์„ธ ๋ณด๊ธฐ? (y/n): ").strip().lower()
322
+ if show_source == 'y':
323
+ for i, source in enumerate(result['sources'], 1):
324
+ print(f"\n{'='*40}")
325
+ print(f"[๋ฌธ์„œ {i}] {source['filename']}")
326
+ print(f"๋ฐœ์ฃผ๊ธฐ๊ด€: {source['organization']}")
327
+ print(f"๋‚ด์šฉ:\n{source['content'][:500]}...")
328
+
329
+ except Exception as e:
330
+ print(f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
331
+
332
+
333
+ # ์‚ฌ์šฉ ์˜ˆ์‹œ
334
+ if __name__ == "__main__":
335
+ interactive_mode()
src/generator/generator_gguf.py CHANGED
@@ -1,4 +1,4 @@
1
- # from llama_cpp import Llama
2
  from typing import Optional, Dict, Any, List
3
  import logging
4
  import time
 
1
+ from llama_cpp import Llama
2
  from typing import Optional, Dict, Any, List
3
  import logging
4
  import time
src/visualization/chatbot_app.py CHANGED
@@ -2,7 +2,7 @@
2
  ๊ณต๊ณต๊ธฐ๊ด€ ์‚ฌ์—…์ œ์•ˆ์„œ RAG ์ฑ—๋ด‡
3
 
4
  ๊ธฐ๋Šฅ:
5
- - ๋ชจ๋ธ ์„ ํƒ (API/๋กœ์ปฌ)
6
  - Query Router (๊ฒ€์ƒ‰ vs ์ง์ ‘ ๋‹ต๋ณ€)
7
  - RAG ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต (Hybrid Search + Re-ranker)
8
  - ์กฐ๊ฑด๋ถ€ ์ฐธ๊ณ  ๋ฌธ์„œ ํ‘œ์‹œ
@@ -111,6 +111,14 @@ st.markdown("""
111
  margin-top: 0.5rem;
112
  border-left: 3px solid #ff9800;
113
  }
 
 
 
 
 
 
 
 
114
  </style>
115
  """, unsafe_allow_html=True)
116
 
@@ -132,15 +140,47 @@ if 'show_routing_info' not in st.session_state:
132
  # ===== RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” =====
133
  @st.cache_resource
134
  def initialize_rag(model_type):
135
- """RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” (API ๋ชจ๋ธ ์ „์šฉ)"""
 
 
 
 
 
 
 
 
136
  try:
137
  config = RAGConfig()
138
- from src.generator.generator import RAGPipeline
139
- rag = RAGPipeline(config=config)
140
- return rag, None, "API"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  except Exception as e:
143
- return None, str(e), None
 
 
144
 
145
 
146
  # ===== ๋‹ต๋ณ€ ์ƒ์„ฑ =====
@@ -156,12 +196,14 @@ def generate_answer(query: str, top_k: int = 10, search_mode: str = "hybrid_rera
156
  return result
157
 
158
  except Exception as e:
 
 
159
  return {
160
- 'answer': f"โŒ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
161
  'sources': [],
162
- 'used_retrieval': False, # โ† ์ถ”๊ฐ€
163
  'search_mode': search_mode,
164
- 'routing_info': None, # โ† ์ถ”๊ฐ€
165
  'usage': {'total_tokens': 0, 'prompt_tokens': 0, 'completion_tokens': 0}
166
  }
167
 
@@ -173,8 +215,8 @@ def display_message(
173
  sources: list = None,
174
  usage: dict = None,
175
  search_mode: str = None,
176
- used_retrieval: bool = None, # โ† ์‹ ๊ทœ
177
- routing_info: dict = None # โ† ์‹ ๊ทœ
178
  ):
179
  """
180
  ๋ฉ”์‹œ์ง€๋ฅผ ํ™”๋ฉด์— ํ‘œ์‹œ
@@ -231,7 +273,7 @@ def display_message(
231
  'hybrid': '๐Ÿ”€ Hybrid Search',
232
  'embedding_rerank': '๐Ÿ“Š ์ž„๋ฒ ๋”ฉ + Re-ranker',
233
  'embedding': '๐Ÿ“Š ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰',
234
- 'direct': '๐Ÿ’ฌ Direct (๊ฒ€์ƒ‰ ์—†์Œ)' # โ† ์ถ”๊ฐ€
235
  }
236
  st.markdown(f"""
237
  <div class="search-mode-info">
@@ -299,14 +341,33 @@ def main():
299
  model_type = st.selectbox(
300
  "์ƒ์„ฑ ๋ชจ๋ธ ์„ ํƒ",
301
  options=[
302
- "API ๋ชจ๋ธ (GPT)"
 
303
  ],
304
  index=0,
305
- help="OpenAI API ์‚ฌ์šฉ (๋น ๋ฅด๊ณ  ์•ˆ์ •์ )"
306
  )
307
 
308
- # ๋ชจ๋ธ ์ •๋ณด ํ‘œ์‹œ
309
- st.info("๐ŸŒ OpenAI GPT ๋ชจ๋ธ ์‚ฌ์šฉ ์ค‘")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
  st.markdown("---")
312
 
@@ -344,7 +405,7 @@ def main():
344
  "๊ฒ€์ƒ‰ํ•  ๋ฌธ์„œ ๊ฐœ์ˆ˜ (Top-K)",
345
  min_value=1,
346
  max_value=20,
347
- value=10, # ๊ธฐ๋ณธ๊ฐ’
348
  help="๊ฒ€์ƒ‰ํ•  ๋ฌธ์„œ ๊ฐœ์ˆ˜"
349
  )
350
 
@@ -380,7 +441,7 @@ def main():
380
  st.rerun()
381
 
382
  if st.button("๐Ÿ’พ ๋Œ€ํ™” ๋‹ค์šด๋กœ๋“œ", use_container_width=True):
383
- if len(st.session_state.conv_manager) > 0: # โœ… conv_manager ์‚ฌ์šฉ
384
  json_str = st.session_state.conv_manager.export_to_json()
385
 
386
  st.download_button(
@@ -416,20 +477,38 @@ def main():
416
  if (st.session_state.rag_pipeline is None or
417
  st.session_state.model_type != model_type):
418
 
419
- with st.spinner(f"๐Ÿ”„ {model_type} ์ดˆ๊ธฐํ™” ์ค‘..."):
420
  rag, error, rag_type = initialize_rag(model_type)
421
 
422
  if error:
423
- st.error(f"โŒ RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {error}")
 
 
 
 
424
  st.info("""
425
  ### ๐Ÿ’ก ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•
426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  1. ChromaDB๊ฐ€ ์ƒ์„ฑ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ:
428
  ```bash
429
  python main.py --step embed
430
  ```
431
 
432
- 2. OpenAI API ํ‚ค๊ฐ€ ์„ค์ •๋˜์—ˆ๋Š”์ง€ ํ™•์ธ:
433
  ```bash
434
  # .env ํŒŒ์ผ
435
  OPENAI_API_KEY=your-key-here
@@ -449,7 +528,7 @@ pip install rank-bm25 sentence-transformers
449
  # ===== ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ํ‘œ์‹œ =====
450
  st.markdown("---")
451
 
452
- if len(st.session_state.conv_manager) == 0: # โœ… conv_manager ์‚ฌ์šฉ
453
  st.info("""
454
  ### ๐Ÿ‘‹ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค!
455
 
@@ -470,8 +549,8 @@ pip install rank-bm25 sentence-transformers
470
  sources=msg.get('sources'),
471
  usage=msg.get('usage'),
472
  search_mode=msg.get('search_mode'),
473
- used_retrieval=msg.get('used_retrieval'), # โ† ์‹ ๊ทœ
474
- routing_info=msg.get('routing_info') # โ† ์‹ ๊ทœ
475
  )
476
 
477
  # ===== ์งˆ๋ฌธ ์ž…๋ ฅ =====
 
2
  ๊ณต๊ณต๊ธฐ๊ด€ ์‚ฌ์—…์ œ์•ˆ์„œ RAG ์ฑ—๋ด‡
3
 
4
  ๊ธฐ๋Šฅ:
5
+ - ๋ชจ๋ธ ์„ ํƒ (API/๋กœ์ปฌ GGUF)
6
  - Query Router (๊ฒ€์ƒ‰ vs ์ง์ ‘ ๋‹ต๋ณ€)
7
  - RAG ๊ธฐ๋ฐ˜ ์งˆ์˜์‘๋‹ต (Hybrid Search + Re-ranker)
8
  - ์กฐ๊ฑด๋ถ€ ์ฐธ๊ณ  ๋ฌธ์„œ ํ‘œ์‹œ
 
111
  margin-top: 0.5rem;
112
  border-left: 3px solid #ff9800;
113
  }
114
+ .model-info {
115
+ background-color: #f3e5f5;
116
+ padding: 0.8rem 1rem;
117
+ border-radius: 0.3rem;
118
+ font-size: 0.9rem;
119
+ margin: 0.5rem 0;
120
+ border-left: 3px solid #9c27b0;
121
+ }
122
  </style>
123
  """, unsafe_allow_html=True)
124
 
 
140
  # ===== RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” =====
141
  @st.cache_resource
142
  def initialize_rag(model_type):
143
+ """
144
+ RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”
145
+
146
+ Args:
147
+ model_type: "API ๋ชจ๋ธ (GPT)" ๋˜๋Š” "๋กœ์ปฌ ๋ชจ๋ธ (GGUF)"
148
+
149
+ Returns:
150
+ (rag_pipeline, error_message, model_name)
151
+ """
152
  try:
153
  config = RAGConfig()
154
+
155
+ if model_type == "API ๋ชจ๋ธ (GPT)":
156
+ # API ๋ชจ๋ธ ์‚ฌ์šฉ
157
+ from src.generator.generator import RAGPipeline
158
+ rag = RAGPipeline(config=config)
159
+ return rag, None, "OpenAI GPT"
160
+
161
+ elif model_type == "๋กœ์ปฌ ๋ชจ๋ธ (GGUF)":
162
+ # GGUF ๋ชจ๋ธ ์‚ฌ์šฉ
163
+ from src.generator.generator_gguf import GGUFRAGPipeline
164
+
165
+ # T4 GPU ์ตœ์  ์„ค์ •
166
+ rag = GGUFRAGPipeline(
167
+ config=config,
168
+ n_gpu_layers=35, # T4์—์„œ ์ „์ฒด ๋ ˆ์ด์–ด GPU ์‚ฌ์šฉ
169
+ n_ctx=2048, # ์ปจํ…์ŠคํŠธ ๊ธธ์ด
170
+ n_threads=4, # CPU ์Šค๋ ˆ๋“œ (GPU ์‚ฌ์šฉ ์‹œ ๋‚ฎ๊ฒŒ)
171
+ max_new_tokens=512, # ์ตœ๋Œ€ ์ƒ์„ฑ ํ† ํฐ
172
+ temperature=0.7,
173
+ top_p=0.9
174
+ )
175
+ return rag, None, "Llama-3-Ko-8B (GGUF)"
176
+
177
+ else:
178
+ return None, f"์•Œ ์ˆ˜ ์—†๋Š” ๋ชจ๋ธ ํƒ€์ž…: {model_type}", None
179
 
180
  except Exception as e:
181
+ import traceback
182
+ error_detail = traceback.format_exc()
183
+ return None, f"{str(e)}\n\n{error_detail}", None
184
 
185
 
186
  # ===== ๋‹ต๋ณ€ ์ƒ์„ฑ =====
 
196
  return result
197
 
198
  except Exception as e:
199
+ import traceback
200
+ error_detail = traceback.format_exc()
201
  return {
202
+ 'answer': f"โŒ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}\n\n{error_detail}",
203
  'sources': [],
204
+ 'used_retrieval': False,
205
  'search_mode': search_mode,
206
+ 'routing_info': None,
207
  'usage': {'total_tokens': 0, 'prompt_tokens': 0, 'completion_tokens': 0}
208
  }
209
 
 
215
  sources: list = None,
216
  usage: dict = None,
217
  search_mode: str = None,
218
+ used_retrieval: bool = None,
219
+ routing_info: dict = None
220
  ):
221
  """
222
  ๋ฉ”์‹œ์ง€๋ฅผ ํ™”๋ฉด์— ํ‘œ์‹œ
 
273
  'hybrid': '๐Ÿ”€ Hybrid Search',
274
  'embedding_rerank': '๐Ÿ“Š ์ž„๋ฒ ๋”ฉ + Re-ranker',
275
  'embedding': '๐Ÿ“Š ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰',
276
+ 'direct': '๐Ÿ’ฌ Direct (๊ฒ€์ƒ‰ ์—†์Œ)'
277
  }
278
  st.markdown(f"""
279
  <div class="search-mode-info">
 
341
  model_type = st.selectbox(
342
  "์ƒ์„ฑ ๋ชจ๋ธ ์„ ํƒ",
343
  options=[
344
+ "API ๋ชจ๋ธ (GPT)",
345
+ "๋กœ์ปฌ ๋ชจ๋ธ (GGUF)"
346
  ],
347
  index=0,
348
+ help="OpenAI API ๋˜๋Š” ๋กœ์ปฌ GGUF ๋ชจ๋ธ ์„ ํƒ"
349
  )
350
 
351
+ # ๋ชจ๋ธ๋ณ„ ์ •๋ณด ํ‘œ์‹œ
352
+ if model_type == "API ๋ชจ๋ธ (GPT)":
353
+ st.markdown("""
354
+ <div class="model-info">
355
+ ๐ŸŒ <b>OpenAI GPT ๋ชจ๋ธ</b><br>
356
+ โ€ข ๋น ๋ฅด๊ณ  ์•ˆ์ •์ <br>
357
+ โ€ข API ํ‚ค ํ•„์š”<br>
358
+ โ€ข ๋น„์šฉ ๋ฐœ์ƒ (ํ† ํฐ๋‹น)
359
+ </div>
360
+ """, unsafe_allow_html=True)
361
+ else:
362
+ st.markdown("""
363
+ <div class="model-info">
364
+ ๐Ÿ–ฅ๏ธ <b>Llama-3-Ko-8B (GGUF)</b><br>
365
+ โ€ข T4 GPU ๊ฐ€์†<br>
366
+ โ€ข ๋กœ์ปฌ ์‹คํ–‰ (๋ฌด๋ฃŒ)<br>
367
+ โ€ข ์ดˆ๊ธฐ ๋กœ๋”ฉ ์‹œ๊ฐ„ ์†Œ์š”<br>
368
+ โ€ข 35๊ฐœ ๋ ˆ์ด์–ด GPU ์‚ฌ์šฉ
369
+ </div>
370
+ """, unsafe_allow_html=True)
371
 
372
  st.markdown("---")
373
 
 
405
  "๊ฒ€์ƒ‰ํ•  ๋ฌธ์„œ ๊ฐœ์ˆ˜ (Top-K)",
406
  min_value=1,
407
  max_value=20,
408
+ value=10,
409
  help="๊ฒ€์ƒ‰ํ•  ๋ฌธ์„œ ๊ฐœ์ˆ˜"
410
  )
411
 
 
441
  st.rerun()
442
 
443
  if st.button("๐Ÿ’พ ๋Œ€ํ™” ๋‹ค์šด๋กœ๋“œ", use_container_width=True):
444
+ if len(st.session_state.conv_manager) > 0:
445
  json_str = st.session_state.conv_manager.export_to_json()
446
 
447
  st.download_button(
 
477
  if (st.session_state.rag_pipeline is None or
478
  st.session_state.model_type != model_type):
479
 
480
+ with st.spinner(f"๐Ÿ”„ {model_type} ์ดˆ๊ธฐํ™” ์ค‘... (GGUF ๋ชจ๋ธ์€ 1~2๋ถ„ ์†Œ์š”๋  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค)"):
481
  rag, error, rag_type = initialize_rag(model_type)
482
 
483
  if error:
484
+ st.error(f"โŒ RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” ์‹คํŒจ")
485
+
486
+ with st.expander("๐Ÿ” ์—๋Ÿฌ ์ƒ์„ธ ์ •๋ณด"):
487
+ st.code(error)
488
+
489
  st.info("""
490
  ### ๐Ÿ’ก ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•
491
 
492
+ **GGUF ๋ชจ๋ธ ์‹คํŒจ ์‹œ:**
493
+ 1. llama-cpp-python ์„ค์น˜ ํ™•์ธ:
494
+ ```bash
495
+ pip install llama-cpp-python
496
+ ```
497
+
498
+ 2. GGUF ๋ชจ๋ธ ํŒŒ์ผ ํ™•์ธ:
499
+ - config.yaml์˜ GGUF_MODEL_PATH ๋˜๋Š”
500
+ - MODEL_HUB_REPO ์„ค์ • ํ™•์ธ
501
+
502
+ 3. GPU ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ ์‹œ:
503
+ - n_gpu_layers ๊ฐ’ ๊ฐ์†Œ (35 โ†’ 20)
504
+
505
+ **API ๋ชจ๋ธ ์‹คํŒจ ์‹œ:**
506
  1. ChromaDB๊ฐ€ ์ƒ์„ฑ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ:
507
  ```bash
508
  python main.py --step embed
509
  ```
510
 
511
+ 2. OpenAI API ํ‚ค ํ™•์ธ:
512
  ```bash
513
  # .env ํŒŒ์ผ
514
  OPENAI_API_KEY=your-key-here
 
528
  # ===== ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ํ‘œ์‹œ =====
529
  st.markdown("---")
530
 
531
+ if len(st.session_state.conv_manager) == 0:
532
  st.info("""
533
  ### ๐Ÿ‘‹ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค!
534
 
 
549
  sources=msg.get('sources'),
550
  usage=msg.get('usage'),
551
  search_mode=msg.get('search_mode'),
552
+ used_retrieval=msg.get('used_retrieval'),
553
+ routing_info=msg.get('routing_info')
554
  )
555
 
556
  # ===== ์งˆ๋ฌธ ์ž…๋ ฅ =====