Dongjin1203 commited on
Commit
9630ae8
ยท
0 Parent(s):

Initial commit - RFPilot experiment

Browse files
.dockerignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .git
4
+ .env
5
+ .venv
6
+ *.log
7
+ EOF
.gitignore ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # ๋ชจ๋ธ & DB
210
+ chroma_db/
211
+ models/
212
+ *.gguf
213
+ .cache/
214
+
215
+ # Python
216
+ __pycache__/
217
+ *.py[cod]
218
+ *$py.class
219
+ *.so
220
+ .Python
221
+ env/
222
+ venv/
223
+
224
+ # IDE
225
+ .vscode/
226
+ .idea/
227
+
228
+ # OS
229
+ .DS_Store
230
+ Thumbs.db
231
+
232
+ # Env
233
+ .env
234
+ .env.local
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
2
+
3
+ WORKDIR /app
4
+
5
+ # Python ์„ค์น˜
6
+ RUN apt-get update && apt-get install -y \
7
+ python3.10 \
8
+ python3-pip \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ RUN pip install --no-cache-dir --upgrade pip
13
+
14
+ # requirements.txt ๋ณต์‚ฌ
15
+ COPY requirements.txt .
16
+
17
+ # llama-cpp-python (wheel)
18
+ RUN pip install --no-cache-dir \
19
+ llama-cpp-python==0.2.90 \
20
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
21
+
22
+ # ๋‚˜๋จธ์ง€ ํŒจํ‚ค์ง€
23
+ RUN pip install --no-cache-dir -r requirements.txt
24
+
25
+ # ์†Œ์Šค ๋ณต์‚ฌ
26
+ COPY . .
27
+
28
+ EXPOSE 7860
29
+
30
+ CMD ["python3", "app.py"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Dongjin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RFPilot Model Comparison
3
+ emoji: ๐Ÿ”ฌ
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_port: 7860
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # QLoRA_RAG_test
14
+ ---
15
+ ์ด ํ”„๋กœ์ ํŠธ๋Š” RFP ๋ฌธ์„œ์š”์•ฝ RAG ์ฑ—๋ด‡ ํ”„๋กœ์ ํŠธ์˜ ํ›„์† ์—ฐ๊ตฌ ์ž…๋‹ˆ๋‹ค.
16
+
17
+ ## ๋ฌธ์ œ
18
+ ๊ธฐ์กด ์„œ๋น„์Šค ๊ตฌ์กฐ์—์„œ Fine-Tuning๋œ ๋ชจ๋ธ์— RAG ์‹œ์Šคํ…œ์„ ์ ์šฉํ•˜์˜€๋˜ ๊ฒƒ์ด ๊ณผ์ ํ•ฉ์„ ์•ผ๊ธฐํ•˜๋Š”์ง€, ์–ด๋–ค ํšจ๊ณผ๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธ ํ•˜์ง€ ๋ชปํ•˜์˜€๋‹ค.
19
+
20
+ ## ์‹คํ—˜ ์ ˆ์ฐจ
21
+
22
+ - QLoRA ๋œ ๋ชจ๋ธ์„ ์ค€๋น„ํ•œ๋‹ค.
23
+ - Fine-Tuning ํ•˜์ง€ ์•Š์€ ์›๋ณธ ๋ชจ๋ธ์„ ์ค€๋น„ํ•œ๋‹ค.
24
+ - ํ‰๊ฐ€ ๋ฐ์ดํ„ฐ์…‹์„ ์ƒ์„ฑํ•œ๋‹ค.
25
+ - Fine-Tuning์„ ํ•œ ๊ฒฝ์šฐ, Fine-Tuning์„ ํ•˜์ง€ ์•Š๊ณ  RAG๋งŒ ์ ์šฉํ•œ ๊ฒฝ์šฐ, ๋‘˜ ๋‹ค ์ ์šฉํ•œ ๊ฒฝ์šฐ๋ฅผ ๋‚˜๋ˆ  ํ…Œ์ŠคํŠธ๋ฅผ ํ•ด๋ณธ๋‹ค.
26
+ - ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธ ํ•œ๋‹ค.
27
+
28
+ ---
29
+
30
+ ## ๊ฒฐ๊ณผ
app.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HuggingFace Space์šฉ ์‹คํ—˜ ์•ฑ
3
+
4
+ Gradio๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์›น UI์—์„œ ์‹คํ—˜์„ ์‹คํ–‰ํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.
5
+ """
6
+
7
+ import gradio as gr
8
+ import os
9
+ import json
10
+ import pandas as pd
11
+ from pathlib import Path
12
+ import matplotlib.pyplot as plt
13
+ from datetime import datetime
14
+
15
+ # ํ”„๋กœ์ ํŠธ ๊ฒฝ๋กœ ์„ค์ •
16
+ import sys
17
+ sys.path.insert(0, str(Path(__file__).parent))
18
+
19
+ from src.eval_dataset import EvalDataset
20
+ from src.compare_models import ModelComparison
21
+ from src.analyze_results import ResultAnalyzer
22
+
23
+
24
+ class ExperimentApp:
25
+ """์‹คํ—˜ ์•ฑ ํด๋ž˜์Šค"""
26
+
27
+ def __init__(self):
28
+ self.experiment = None
29
+ self.latest_result_file = None
30
+
31
+ def setup_environment(self, api_key: str) -> str:
32
+ """ํ™˜๊ฒฝ ์„ค์ •"""
33
+ if not api_key:
34
+ return "โŒ OpenAI API ํ‚ค๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
35
+
36
+ os.environ['OPENAI_API_KEY'] = api_key
37
+ os.environ['USE_MODEL_HUB'] = 'true'
38
+ os.environ['GGUF_N_GPU_LAYERS'] = '35'
39
+
40
+ return "โœ… ํ™˜๊ฒฝ ์„ค์ • ์™„๋ฃŒ!"
41
+
42
+ def run_experiment(
43
+ self,
44
+ api_key: str,
45
+ distribution: str,
46
+ progress=gr.Progress()
47
+ ) -> tuple:
48
+ """์‹คํ—˜ ์‹คํ–‰"""
49
+ try:
50
+ # ํ™˜๊ฒฝ ์„ค์ •
51
+ setup_msg = self.setup_environment(api_key)
52
+ if "โŒ" in setup_msg:
53
+ return setup_msg, None, None
54
+
55
+ progress(0.1, desc="ํ™˜๊ฒฝ ์„ค์ • ์™„๋ฃŒ...")
56
+
57
+ # Config ๋กœ๋“œ
58
+ from src.utils.config import RAGConfig
59
+ config = RAGConfig()
60
+
61
+ progress(0.2, desc="์‹คํ—˜ ์ดˆ๊ธฐํ™” ์ค‘...")
62
+
63
+ # ์‹คํ—˜ ์ดˆ๊ธฐํ™”
64
+ self.experiment = ModelComparison(
65
+ config=config,
66
+ output_dir="./experiments/results"
67
+ )
68
+
69
+ progress(0.3, desc="๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘... (3-5๋ถ„ ์†Œ์š”)")
70
+
71
+ # ๋ชจ๋ธ ๋กœ๋“œ
72
+ self.experiment.load_models()
73
+
74
+ progress(0.5, desc="์‹คํ—˜ ์‹คํ–‰ ์ค‘... (10-20๋ถ„ ์†Œ์š”)")
75
+
76
+ # ์‹คํ—˜ ์‹คํ–‰
77
+ results = self.experiment.run_experiment(
78
+ distribution=distribution.lower(),
79
+ save_results=True
80
+ )
81
+
82
+ progress(0.9, desc="๊ฒฐ๊ณผ ๋ถ„์„ ์ค‘...")
83
+
84
+ # ์ตœ์‹  ๊ฒฐ๊ณผ ํŒŒ์ผ ์ฐพ๊ธฐ
85
+ result_files = sorted(
86
+ Path("./experiments/results").glob("results_*.json"),
87
+ reverse=True
88
+ )
89
+ self.latest_result_file = str(result_files[0]) if result_files else None
90
+
91
+ # ์š”์•ฝ ์ƒ์„ฑ
92
+ summary = self._generate_summary(results)
93
+
94
+ # CSV ์ƒ์„ฑ
95
+ df = self._results_to_dataframe(results)
96
+
97
+ progress(1.0, desc="์™„๋ฃŒ!")
98
+
99
+ return "โœ… ์‹คํ—˜ ์™„๋ฃŒ!", summary, df
100
+
101
+ except Exception as e:
102
+ return f"โŒ ์‹คํ—˜ ์‹คํŒจ: {str(e)}", None, None
103
+
104
+ def _generate_summary(self, results: dict) -> str:
105
+ """์š”์•ฝ ์ƒ์„ฑ"""
106
+ summary = "=" * 60 + "\n"
107
+ summary += "์‹คํ—˜ ๊ฒฐ๊ณผ ์š”์•ฝ\n"
108
+ summary += "=" * 60 + "\n\n"
109
+
110
+ metadata = results['metadata']
111
+ summary += f"ํƒ€์ž„์Šคํƒฌํ”„: {metadata['timestamp']}\n"
112
+ summary += f"๋ถ„ํฌ: {metadata['distribution']}\n"
113
+ summary += f"๋ชจ๋ธ: {', '.join(metadata['models'])}\n"
114
+ summary += f"์ด ์งˆ๋ฌธ ์ˆ˜: {metadata['total_queries']}\n\n"
115
+
116
+ # ๊ฐ ๋ถ„ํฌ๋ณ„ ์š”์•ฝ
117
+ for dist_type, dist_results in results['results'].items():
118
+ summary += f"\n{'='*60}\n"
119
+ summary += f"{dist_type.upper()}\n"
120
+ summary += f"{'='*60}\n\n"
121
+
122
+ # ๋ชจ๋ธ๋ณ„ ํ†ต๊ณ„
123
+ model_stats = {}
124
+ for result in dist_results:
125
+ model = result['model']
126
+ if model not in model_stats:
127
+ model_stats[model] = []
128
+ model_stats[model].append(result)
129
+
130
+ for model, model_results in model_stats.items():
131
+ success_count = sum(1 for r in model_results if r['success'])
132
+ avg_time = sum(r['elapsed_time'] for r in model_results if r['success']) / max(success_count, 1)
133
+
134
+ summary += f"[{model}]\n"
135
+ summary += f" ์„ฑ๊ณต: {success_count}/{len(model_results)}\n"
136
+ summary += f" ํ‰๊ท  ์‹œ๊ฐ„: {avg_time:.3f}์ดˆ\n\n"
137
+
138
+ return summary
139
+
140
+ def _results_to_dataframe(self, results: dict) -> pd.DataFrame:
141
+ """๊ฒฐ๊ณผ๋ฅผ DataFrame์œผ๋กœ ๋ณ€ํ™˜"""
142
+ all_rows = []
143
+
144
+ for dist_type, dist_results in results['results'].items():
145
+ for result in dist_results:
146
+ row = {
147
+ 'distribution': dist_type,
148
+ 'model': result['model'],
149
+ 'query': result['query'],
150
+ 'success': result['success'],
151
+ 'elapsed_time': result['elapsed_time'],
152
+ 'total_tokens': result.get('usage', {}).get('total_tokens', 0)
153
+ }
154
+ all_rows.append(row)
155
+
156
+ return pd.DataFrame(all_rows)
157
+
158
+ def analyze_results(self) -> tuple:
159
+ """๊ฒฐ๊ณผ ๋ถ„์„"""
160
+ if not self.latest_result_file:
161
+ return "โŒ ๋จผ์ € ์‹คํ—˜์„ ์‹คํ–‰ํ•ด์ฃผ์„ธ์š”.", None, None, None, None
162
+
163
+ try:
164
+ analyzer = ResultAnalyzer(self.latest_result_file)
165
+
166
+ # ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
167
+ analyzer.plot_time_comparison()
168
+ analyzer.plot_token_comparison()
169
+ analyzer.plot_rag_usage()
170
+ analyzer.plot_overfitting_analysis()
171
+
172
+ # ๊ทธ๋ž˜ํ”„ ํŒŒ์ผ ๊ฒฝ๋กœ
173
+ analysis_dir = Path(self.latest_result_file).parent / "analysis"
174
+
175
+ time_plot = str(analysis_dir / "time_comparison.png")
176
+ token_plot = str(analysis_dir / "token_comparison.png")
177
+ rag_plot = str(analysis_dir / "rag_usage.png")
178
+ overfitting_plot = str(analysis_dir / "overfitting_analysis.png")
179
+
180
+ return (
181
+ "โœ… ๋ถ„์„ ์™„๋ฃŒ!",
182
+ time_plot if Path(time_plot).exists() else None,
183
+ token_plot if Path(token_plot).exists() else None,
184
+ rag_plot if Path(rag_plot).exists() else None,
185
+ overfitting_plot if Path(overfitting_plot).exists() else None
186
+ )
187
+
188
+ except Exception as e:
189
+ return f"โŒ ๋ถ„์„ ์‹คํŒจ: {str(e)}", None, None, None, None
190
+
191
+
192
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
193
+ def create_interface():
194
+ """Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ"""
195
+ app = ExperimentApp()
196
+
197
+ with gr.Blocks(title="RFPilot ๋ชจ๋ธ ๋น„๊ต ์‹คํ—˜") as demo:
198
+ gr.Markdown("""
199
+ # ๐Ÿ”ฌ RFPilot ๋ชจ๋ธ ๋น„๊ต ์‹คํ—˜
200
+
201
+ 3๊ฐ€์ง€ ๋ชจ๋ธ(QLoRA+RAG, QLoRA ๋‹จ๋…, Base+RAG)์˜ ์„ฑ๋Šฅ์„ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค.
202
+
203
+ โš ๏ธ **์ฃผ์˜**: ์‹คํ—˜ ์‹คํ–‰ ์‹œ๊ฐ„์ด ์˜ค๋ž˜ ๊ฑธ๋ฆฝ๋‹ˆ๋‹ค (10-20๋ถ„).
204
+ """)
205
+
206
+ with gr.Tab("๐Ÿš€ ์‹คํ—˜ ์‹คํ–‰"):
207
+ api_key_input = gr.Textbox(
208
+ label="OpenAI API Key",
209
+ type="password",
210
+ placeholder="sk-..."
211
+ )
212
+
213
+ distribution_input = gr.Radio(
214
+ choices=["All", "In", "Out"],
215
+ value="All",
216
+ label="๋ถ„ํฌ ์„ ํƒ"
217
+ )
218
+
219
+ run_btn = gr.Button("์‹คํ—˜ ์‹œ์ž‘", variant="primary")
220
+
221
+ status_output = gr.Textbox(label="์ƒํƒœ", lines=2)
222
+ summary_output = gr.Textbox(label="์š”์•ฝ", lines=20)
223
+ results_output = gr.Dataframe(label="๊ฒฐ๊ณผ")
224
+
225
+ run_btn.click(
226
+ fn=app.run_experiment,
227
+ inputs=[api_key_input, distribution_input],
228
+ outputs=[status_output, summary_output, results_output]
229
+ )
230
+
231
+ with gr.Tab("๐Ÿ“Š ๊ฒฐ๊ณผ ๋ถ„์„"):
232
+ analyze_btn = gr.Button("๋ถ„์„ ์‹œ์ž‘", variant="primary")
233
+
234
+ analyze_status = gr.Textbox(label="์ƒํƒœ")
235
+
236
+ with gr.Row():
237
+ time_plot = gr.Image(label="์‘๋‹ต ์‹œ๊ฐ„ ๋น„๊ต")
238
+ token_plot = gr.Image(label="ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰ ๋น„๊ต")
239
+
240
+ with gr.Row():
241
+ rag_plot = gr.Image(label="RAG ์‚ฌ์šฉ ํŒจํ„ด")
242
+ overfitting_plot = gr.Image(label="๊ณผ์ ํ•ฉ ๋ถ„์„")
243
+
244
+ analyze_btn.click(
245
+ fn=app.analyze_results,
246
+ outputs=[
247
+ analyze_status,
248
+ time_plot,
249
+ token_plot,
250
+ rag_plot,
251
+ overfitting_plot
252
+ ]
253
+ )
254
+
255
+ with gr.Tab("โ„น๏ธ ์ •๋ณด"):
256
+ gr.Markdown("""
257
+ ## ๐Ÿ“‹ ๋น„๊ต ๋ชจ๋ธ
258
+
259
+ | ๋ชจ๋ธ | ์„ค๋ช… |
260
+ |------|------|
261
+ | QLoRA + RAG | ๊ธฐ์กด ์„œ๋น„์Šค (QLoRA fine-tuning + RAG) |
262
+ | QLoRA ๋‹จ๋… | RAG ์ œ๊ฑฐ (QLoRA๋งŒ) |
263
+ | Base + RAG | PEFT ์ œ๊ฑฐ (Base ๋ชจ๋ธ + RAG) |
264
+
265
+ ## ๐Ÿ“Š ์ธก์ • ์ง€ํ‘œ
266
+
267
+ - **๊ณผ์ ํ•ฉ**: In-Distribution vs Out-Distribution ์„ฑ๋Šฅ ์ฐจ์ด
268
+ - **๋‹ต๋ณ€ ์†๋„**: ํ‰๊ท  ์‘๋‹ต ์‹œ๊ฐ„
269
+ - **ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰**: ํ‰๊ท  ํ† ํฐ ์†Œ๋น„
270
+ - **RAG ์‚ฌ์šฉ ํŒจํ„ด**: RAG ํ™œ์šฉ๋„
271
+
272
+ ## โฑ๏ธ ์˜ˆ์ƒ ์†Œ์š” ์‹œ๊ฐ„
273
+
274
+ - ๋ชจ๋ธ ๋กœ๋”ฉ: 3-5๋ถ„
275
+ - ์‹คํ—˜ ์‹คํ–‰: 10-20๋ถ„ (25๊ฐœ ์งˆ๋ฌธ)
276
+ - ๊ฒฐ๊ณผ ๋ถ„์„: 1-2๋ถ„
277
+ """)
278
+
279
+ return demo
280
+
281
+
282
+ if __name__ == "__main__":
283
+ demo = create_interface()
284
+ demo.launch(share=True)
app.py.old ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # """
2
+ # HuggingFace Space์šฉ ์‹คํ—˜ ์•ฑ
3
+
4
+ # Gradio๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์›น UI์—์„œ ์‹คํ—˜์„ ์‹คํ–‰ํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.
5
+ # """
6
+
7
+ # import gradio as gr
8
+ # import os
9
+ # import json
10
+ # import pandas as pd
11
+ # from pathlib import Path
12
+ # import matplotlib.pyplot as plt
13
+ # from datetime import datetime
14
+
15
+ # # ํ”„๋กœ์ ํŠธ ๊ฒฝ๋กœ ์„ค์ •
16
+ # import sys
17
+ # sys.path.insert(0, str(Path(__file__).parent))
18
+
19
+ # from experiments.eval_dataset import EvalDataset
20
+ # from experiments.compare_models import ModelComparison
21
+ # from experiments.analyze_results import ResultAnalyzer
22
+
23
+
24
+ # class ExperimentApp:
25
+ # """์‹คํ—˜ ์•ฑ ํด๋ž˜์Šค"""
26
+
27
+ # def __init__(self):
28
+ # self.experiment = None
29
+ # self.latest_result_file = None
30
+
31
+ # def setup_environment(self, api_key: str) -> str:
32
+ # """ํ™˜๊ฒฝ ์„ค์ •"""
33
+ # if not api_key:
34
+ # return "โŒ OpenAI API ํ‚ค๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
35
+
36
+ # os.environ['OPENAI_API_KEY'] = api_key
37
+ # os.environ['USE_MODEL_HUB'] = 'true'
38
+ # os.environ['GGUF_N_GPU_LAYERS'] = '35'
39
+
40
+ # return "โœ… ํ™˜๊ฒฝ ์„ค์ • ์™„๋ฃŒ!"
41
+
42
+ # def run_experiment(
43
+ # self,
44
+ # api_key: str,
45
+ # distribution: str,
46
+ # progress=gr.Progress()
47
+ # ) -> tuple:
48
+ # """์‹คํ—˜ ์‹คํ–‰"""
49
+ # try:
50
+ # # ํ™˜๊ฒฝ ์„ค์ •
51
+ # setup_msg = self.setup_environment(api_key)
52
+ # if "โŒ" in setup_msg:
53
+ # return setup_msg, None, None
54
+
55
+ # progress(0.1, desc="ํ™˜๊ฒฝ ์„ค์ • ์™„๋ฃŒ...")
56
+
57
+ # # Config ๋กœ๋“œ
58
+ # from src.utils.config import RAGConfig
59
+ # config = RAGConfig()
60
+
61
+ # progress(0.2, desc="์‹คํ—˜ ์ดˆ๊ธฐํ™” ์ค‘...")
62
+
63
+ # # ์‹คํ—˜ ์ดˆ๊ธฐํ™”
64
+ # self.experiment = ModelComparison(
65
+ # config=config,
66
+ # output_dir="./experiments/results"
67
+ # )
68
+
69
+ # progress(0.3, desc="๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘... (3-5๋ถ„ ์†Œ์š”)")
70
+
71
+ # # ๋ชจ๋ธ ๋กœ๋“œ
72
+ # self.experiment.load_models()
73
+
74
+ # progress(0.5, desc="์‹คํ—˜ ์‹คํ–‰ ์ค‘... (10-20๋ถ„ ์†Œ์š”)")
75
+
76
+ # # ์‹คํ—˜ ์‹คํ–‰
77
+ # results = self.experiment.run_experiment(
78
+ # distribution=distribution.lower(),
79
+ # save_results=True
80
+ # )
81
+
82
+ # progress(0.9, desc="๊ฒฐ๊ณผ ๋ถ„์„ ์ค‘...")
83
+
84
+ # # ์ตœ์‹  ๊ฒฐ๊ณผ ํŒŒ์ผ ์ฐพ๊ธฐ
85
+ # result_files = sorted(
86
+ # Path("./experiments/results").glob("results_*.json"),
87
+ # reverse=True
88
+ # )
89
+ # self.latest_result_file = str(result_files[0]) if result_files else None
90
+
91
+ # # ์š”์•ฝ ์ƒ์„ฑ
92
+ # summary = self._generate_summary(results)
93
+
94
+ # # CSV ์ƒ์„ฑ
95
+ # df = self._results_to_dataframe(results)
96
+
97
+ # progress(1.0, desc="์™„๋ฃŒ!")
98
+
99
+ # return "โœ… ์‹คํ—˜ ์™„๋ฃŒ!", summary, df
100
+
101
+ # except Exception as e:
102
+ # return f"โŒ ์‹คํ—˜ ์‹คํŒจ: {str(e)}", None, None
103
+
104
+ # def _generate_summary(self, results: dict) -> str:
105
+ # """์š”์•ฝ ์ƒ์„ฑ"""
106
+ # summary = "=" * 60 + "\n"
107
+ # summary += "์‹คํ—˜ ๊ฒฐ๊ณผ ์š”์•ฝ\n"
108
+ # summary += "=" * 60 + "\n\n"
109
+
110
+ # metadata = results['metadata']
111
+ # summary += f"ํƒ€์ž„์Šคํƒฌํ”„: {metadata['timestamp']}\n"
112
+ # summary += f"๋ถ„ํฌ: {metadata['distribution']}\n"
113
+ # summary += f"๋ชจ๋ธ: {', '.join(metadata['models'])}\n"
114
+ # summary += f"์ด ์งˆ๋ฌธ ์ˆ˜: {metadata['total_queries']}\n\n"
115
+
116
+ # # ๊ฐ ๋ถ„ํฌ๋ณ„ ์š”์•ฝ
117
+ # for dist_type, dist_results in results['results'].items():
118
+ # summary += f"\n{'='*60}\n"
119
+ # summary += f"{dist_type.upper()}\n"
120
+ # summary += f"{'='*60}\n\n"
121
+
122
+ # # ๋ชจ๋ธ๋ณ„ ํ†ต๊ณ„
123
+ # model_stats = {}
124
+ # for result in dist_results:
125
+ # model = result['model']
126
+ # if model not in model_stats:
127
+ # model_stats[model] = []
128
+ # model_stats[model].append(result)
129
+
130
+ # for model, model_results in model_stats.items():
131
+ # success_count = sum(1 for r in model_results if r['success'])
132
+ # avg_time = sum(r['elapsed_time'] for r in model_results if r['success']) / max(success_count, 1)
133
+
134
+ # summary += f"[{model}]\n"
135
+ # summary += f" ์„ฑ๊ณต: {success_count}/{len(model_results)}\n"
136
+ # summary += f" ํ‰๊ท  ์‹œ๊ฐ„: {avg_time:.3f}์ดˆ\n\n"
137
+
138
+ # return summary
139
+
140
+ # def _results_to_dataframe(self, results: dict) -> pd.DataFrame:
141
+ # """๊ฒฐ๊ณผ๋ฅผ DataFrame์œผ๋กœ ๋ณ€ํ™˜"""
142
+ # all_rows = []
143
+
144
+ # for dist_type, dist_results in results['results'].items():
145
+ # for result in dist_results:
146
+ # row = {
147
+ # 'distribution': dist_type,
148
+ # 'model': result['model'],
149
+ # 'query': result['query'],
150
+ # 'success': result['success'],
151
+ # 'elapsed_time': result['elapsed_time'],
152
+ # 'total_tokens': result.get('usage', {}).get('total_tokens', 0)
153
+ # }
154
+ # all_rows.append(row)
155
+
156
+ # return pd.DataFrame(all_rows)
157
+
158
+ # def analyze_results(self) -> tuple:
159
+ # """๊ฒฐ๊ณผ ๋ถ„์„"""
160
+ # if not self.latest_result_file:
161
+ # return "โŒ ๋จผ์ € ์‹คํ—˜์„ ์‹คํ–‰ํ•ด์ฃผ์„ธ์š”.", None, None, None, None
162
+
163
+ # try:
164
+ # analyzer = ResultAnalyzer(self.latest_result_file)
165
+
166
+ # # ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
167
+ # analyzer.plot_time_comparison()
168
+ # analyzer.plot_token_comparison()
169
+ # analyzer.plot_rag_usage()
170
+ # analyzer.plot_overfitting_analysis()
171
+
172
+ # # ๊ทธ๋ž˜ํ”„ ํŒŒ์ผ ๊ฒฝ๋กœ
173
+ # analysis_dir = Path(self.latest_result_file).parent / "analysis"
174
+
175
+ # time_plot = str(analysis_dir / "time_comparison.png")
176
+ # token_plot = str(analysis_dir / "token_comparison.png")
177
+ # rag_plot = str(analysis_dir / "rag_usage.png")
178
+ # overfitting_plot = str(analysis_dir / "overfitting_analysis.png")
179
+
180
+ # return (
181
+ # "โœ… ๋ถ„์„ ์™„๋ฃŒ!",
182
+ # time_plot if Path(time_plot).exists() else None,
183
+ # token_plot if Path(token_plot).exists() else None,
184
+ # rag_plot if Path(rag_plot).exists() else None,
185
+ # overfitting_plot if Path(overfitting_plot).exists() else None
186
+ # )
187
+
188
+ # except Exception as e:
189
+ # return f"โŒ ๋ถ„์„ ์‹คํŒจ: {str(e)}", None, None, None, None
190
+
191
+
192
+ # # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
193
+ # def create_interface():
194
+ # """Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ"""
195
+ # app = ExperimentApp()
196
+
197
+ # with gr.Blocks(title="RFPilot ๋ชจ๋ธ ๋น„๊ต ์‹คํ—˜") as demo:
198
+ # gr.Markdown("""
199
+ # # ๐Ÿ”ฌ RFPilot ๋ชจ๋ธ ๋น„๊ต ์‹คํ—˜
200
+
201
+ # 3๊ฐ€์ง€ ๋ชจ๋ธ(QLoRA+RAG, QLoRA ๋‹จ๋…, Base+RAG)์˜ ์„ฑ๋Šฅ์„ ๋น„๊ตํ•ฉ๋‹ˆ๋‹ค.
202
+
203
+ # โš ๏ธ **์ฃผ์˜**: ์‹คํ—˜ ์‹คํ–‰ ์‹œ๊ฐ„์ด ์˜ค๋ž˜ ๊ฑธ๋ฆฝ๋‹ˆ๋‹ค (10-20๋ถ„).
204
+ # """)
205
+
206
+ # with gr.Tab("๐Ÿš€ ์‹คํ—˜ ์‹คํ–‰"):
207
+ # api_key_input = gr.Textbox(
208
+ # label="OpenAI API Key",
209
+ # type="password",
210
+ # placeholder="sk-..."
211
+ # )
212
+
213
+ # distribution_input = gr.Radio(
214
+ # choices=["All", "In", "Out"],
215
+ # value="All",
216
+ # label="๋ถ„ํฌ ์„ ํƒ"
217
+ # )
218
+
219
+ # run_btn = gr.Button("์‹คํ—˜ ์‹œ์ž‘", variant="primary")
220
+
221
+ # status_output = gr.Textbox(label="์ƒํƒœ", lines=2)
222
+ # summary_output = gr.Textbox(label="์š”์•ฝ", lines=20)
223
+ # results_output = gr.Dataframe(label="๊ฒฐ๊ณผ")
224
+
225
+ # run_btn.click(
226
+ # fn=app.run_experiment,
227
+ # inputs=[api_key_input, distribution_input],
228
+ # outputs=[status_output, summary_output, results_output]
229
+ # )
230
+
231
+ # with gr.Tab("๐Ÿ“Š ๊ฒฐ๊ณผ ๋ถ„์„"):
232
+ # analyze_btn = gr.Button("๋ถ„์„ ์‹œ์ž‘", variant="primary")
233
+
234
+ # analyze_status = gr.Textbox(label="์ƒํƒœ")
235
+
236
+ # with gr.Row():
237
+ # time_plot = gr.Image(label="์‘๋‹ต ์‹œ๊ฐ„ ๋น„๊ต")
238
+ # token_plot = gr.Image(label="ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰ ๋น„๊ต")
239
+
240
+ # with gr.Row():
241
+ # rag_plot = gr.Image(label="RAG ์‚ฌ์šฉ ํŒจํ„ด")
242
+ # overfitting_plot = gr.Image(label="๊ณผ์ ํ•ฉ ๋ถ„์„")
243
+
244
+ # analyze_btn.click(
245
+ # fn=app.analyze_results,
246
+ # outputs=[
247
+ # analyze_status,
248
+ # time_plot,
249
+ # token_plot,
250
+ # rag_plot,
251
+ # overfitting_plot
252
+ # ]
253
+ # )
254
+
255
+ # with gr.Tab("โ„น๏ธ ์ •๋ณด"):
256
+ # gr.Markdown("""
257
+ # ## ๐Ÿ“‹ ๋น„๊ต ๋ชจ๋ธ
258
+
259
+ # | ๋ชจ๋ธ | ์„ค๋ช… |
260
+ # |------|------|
261
+ # | QLoRA + RAG | ๊ธฐ์กด ์„œ๋น„์Šค (QLoRA fine-tuning + RAG) |
262
+ # | QLoRA ๋‹จ๋… | RAG ์ œ๊ฑฐ (QLoRA๋งŒ) |
263
+ # | Base + RAG | PEFT ์ œ๊ฑฐ (Base ๋ชจ๋ธ + RAG) |
264
+
265
+ # ## ๐Ÿ“Š ์ธก์ • ์ง€ํ‘œ
266
+
267
+ # - **๊ณผ์ ํ•ฉ**: In-Distribution vs Out-Distribution ์„ฑ๋Šฅ ์ฐจ์ด
268
+ # - **๋‹ต๋ณ€ ์†๋„**: ํ‰๊ท  ์‘๋‹ต ์‹œ๊ฐ„
269
+ # - **ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰**: ํ‰๊ท  ํ† ํฐ ์†Œ๋น„
270
+ # - **RAG ์‚ฌ์šฉ ํŒจํ„ด**: RAG ํ™œ์šฉ๋„
271
+
272
+ # ## โฑ๏ธ ์˜ˆ์ƒ ์†Œ์š” ์‹œ๊ฐ„
273
+
274
+ # - ๋ชจ๋ธ ๋กœ๋”ฉ: 3-5๋ถ„
275
+ # - ์‹คํ—˜ ์‹คํ–‰: 10-20๋ถ„ (25๊ฐœ ์งˆ๋ฌธ)
276
+ # - ๊ฒฐ๊ณผ ๋ถ„์„: 1-2๋ถ„
277
+ # """)
278
+
279
+ # return demo
280
+
281
+
282
+ # if __name__ == "__main__":
283
+ # demo = create_interface()
284
+ # demo.launch(share=True)
data/eval_dataset.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "version": "1.0",
4
+ "description": "RFPilot ํ‰๊ฐ€ ๋ฐ์ดํ„ฐ์…‹",
5
+ "created_by": "manual_annotation"
6
+ },
7
+ "in_distribution": [
8
+ {
9
+ "query": "์‚ฌ์—… ์ œ์•ˆ์„œ ์ œ์ถœ ๋งˆ๊ฐ์ผ์€ ์–ธ์ œ์ธ๊ฐ€์š”?",
10
+ "expected_answer": "2024๋…„ 3์›” 15์ผ๊นŒ์ง€์ž…๋‹ˆ๋‹ค.",
11
+ "category": "deadline",
12
+ "expected_type": "document",
13
+ "source_doc": "RFP_2024_001.hwp",
14
+ "metadata": {
15
+ "difficulty": "easy"
16
+ }
17
+ },
18
+ {
19
+ "query": "์ œ์•ˆ ์š”์ฒญ์„œ์˜ ์ œ์ถœ ์„œ๋ฅ˜๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?",
20
+ "expected_answer": "๊ธฐ์ˆ ์ œ์•ˆ์„œ, ๊ฐ€๊ฒฉ์ œ์•ˆ์„œ, ์‚ฌ์—…์ž๋“ฑ๋ก์ฆ, ํšŒ์‚ฌ์†Œ๊ฐœ์„œ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.",
21
+ "category": "requirements",
22
+ "expected_type": "document",
23
+ "source_doc": "RFP_2024_001.hwp",
24
+ "metadata": {
25
+ "difficulty": "medium"
26
+ }
27
+ },
28
+ {
29
+ "query": "์‚ฌ์—… ์˜ˆ์‚ฐ ๊ทœ๋ชจ๋Š” ์–ผ๋งˆ์ธ๊ฐ€์š”?",
30
+ "expected_answer": "์ด 5์–ต์›์ž…๋‹ˆ๋‹ค.",
31
+ "category": "budget",
32
+ "expected_type": "document",
33
+ "source_doc": "RFP_2024_002.hwp",
34
+ "metadata": {
35
+ "difficulty": "easy"
36
+ }
37
+ }
38
+ ],
39
+ "out_distribution": [
40
+ {
41
+ "query": "ํ•œ๊ตญ์˜ ์ˆ˜๋„๋Š” ์–ด๋””์ธ๊ฐ€์š”?",
42
+ "expected_answer": "์„œ์šธ์ž…๋‹ˆ๋‹ค.",
43
+ "category": "general_knowledge",
44
+ "expected_type": "out_of_scope",
45
+ "metadata": {
46
+ "difficulty": "easy"
47
+ }
48
+ },
49
+ {
50
+ "query": "ํŒŒ์ด์ฌ์—์„œ ๋ฆฌ์ŠคํŠธ์™€ ํŠœํ”Œ์˜ ์ฐจ์ด๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?",
51
+ "expected_answer": "๋ฆฌ์ŠคํŠธ๋Š” ๊ฐ€๋ณ€(mutable)์ด๊ณ , ํŠœํ”Œ์€ ๋ถˆ๋ณ€(immutable)์ž…๋‹ˆ๋‹ค.",
52
+ "category": "programming",
53
+ "expected_type": "out_of_scope",
54
+ "metadata": {
55
+ "difficulty": "medium"
56
+ }
57
+ }
58
+ ]
59
+ }
data/eval_template.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ distribution,query,expected_answer,category,source_doc,metadata
2
+ in_distribution,์‚ฌ์—… ์ œ์•ˆ์„œ ์ œ์ถœ ๋งˆ๊ฐ์ผ์€ ์–ธ์ œ์ธ๊ฐ€์š”?,2024๋…„ 3์›” 15์ผ๊นŒ์ง€์ž…๋‹ˆ๋‹ค.,deadline,RFP_2024_001.hwp,"{""difficulty"": ""easy""}"
3
+ out_distribution,ํ•œ๊ตญ์˜ ์ˆ˜๋„๋Š” ์–ด๋””์ธ๊ฐ€์š”?,์„œ์šธ์ž…๋‹ˆ๋‹ค.,general_knowledge,,"{""difficulty"": ""easy""}"
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core
2
+ python-dotenv>=1.0.0
3
+ openai>=1.0.0
4
+
5
+ # LLM & RAG
6
+ llama-cpp-python>=0.2.90
7
+ chromadb>=0.4.0
8
+ sentence-transformers>=2.2.0
9
+ rank-bm25>=0.2.2
10
+ huggingface-hub>=0.19.0
11
+
12
+ # ๋ฌธ์„œ ์ฒ˜๋ฆฌ
13
+ pypdf>=3.17.0
14
+ python-docx>=1.1.0
15
+ olefile>=0.47
16
+
17
+ # ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
18
+ pandas>=2.0.0
19
+ numpy>=1.24.0
20
+
21
+ # ์‹œ๊ฐํ™”
22
+ matplotlib>=3.7.0
23
+ seaborn>=0.12.0
24
+
25
+ # ์œ ํ‹ธ๋ฆฌํ‹ฐ
26
+ tqdm>=4.65.0
27
+
28
+ # Gradio
29
+ gradio>=6.0.0
src/analyze_results.py ADDED
File without changes
src/compare_models.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 3๊ฐ€์ง€ ๋ชจ๋ธ ๋น„๊ต ์‹คํ—˜
3
+
4
+ ๋น„๊ต ๋Œ€์ƒ:
5
+ 1. QLoRA + RAG (๊ธฐ์กด ์„œ๋น„์Šค)
6
+ 2. QLoRA ๋‹จ๋… (RAG ์ œ๊ฑฐ)
7
+ 3. Base + RAG (PEFT ์ œ๊ฑฐ)
8
+
9
+ ์ธก์ • ์ง€ํ‘œ:
10
+ - ๊ณผ์ ํ•ฉ ์—ฌ๋ถ€ (In-Distribution vs Out-Distribution)
11
+ - ๋‹ต๋ณ€ ์†๋„ (elapsed_time, retrieval_time, generation_time)
12
+ - ํ† ํฐ ๊ฐœ์ˆ˜ (total_tokens, prompt_tokens, completion_tokens)
13
+ """
14
+
15
+ import os
16
+ import sys
17
+ import time
18
+ import json
19
+ import logging
20
+ from typing import Dict, List, Any
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+
24
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ฒฝ๋กœ ์ถ”๊ฐ€
25
+ project_root = Path(__file__).parent.parent
26
+ sys.path.insert(0, str(project_root))
27
+
28
+ from src.utils.config import RAGConfig
29
+ from eval_dataset import EvalDataset
30
+
31
+ # ๋กœ๊น… ์„ค์ •
32
+ logging.basicConfig(
33
+ level=logging.INFO,
34
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
35
+ )
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ class ModelComparison:
40
+ """๋ชจ๋ธ ๋น„๊ต ์‹คํ—˜ ํด๋ž˜์Šค"""
41
+
42
+ def __init__(self, config=None, output_dir: str = "./results"):
43
+ """์ดˆ๊ธฐํ™”"""
44
+ self.config = config or RAGConfig()
45
+ self.output_dir = Path(output_dir)
46
+ self.output_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ # ํƒ€์ž„์Šคํƒฌํ”„
49
+ self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
50
+
51
+ # ๋ฐ์ดํ„ฐ์…‹
52
+ self.dataset = EvalDataset()
53
+
54
+ # ๋ชจ๋ธ ํŒŒ์ดํ”„๋ผ์ธ
55
+ self.pipelines = {}
56
+
57
+ logger.info(f"โœ… ModelComparison ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
58
+ logger.info(f" ๊ฒฐ๊ณผ ์ €์žฅ ๊ฒฝ๋กœ: {self.output_dir}")
59
+
60
+ def load_models(self):
61
+ """3๊ฐ€์ง€ ๋ชจ๋ธ ๋กœ๋“œ"""
62
+ logger.info("\n" + "="*60)
63
+ logger.info("๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘")
64
+ logger.info("="*60)
65
+
66
+ try:
67
+ # 1. QLoRA + RAG (๊ธฐ์กด)
68
+ logger.info("\n[1/3] QLoRA + RAG ๋ชจ๋ธ ๋กœ๋”ฉ...")
69
+ from src.generator.generator_gguf import GGUFRAGPipeline
70
+ self.pipelines['qlora_rag'] = GGUFRAGPipeline(config=self.config)
71
+ logger.info("โœ… QLoRA + RAG ๋กœ๋“œ ์™„๋ฃŒ")
72
+
73
+ # 2. QLoRA ๋‹จ๋… (RAG ์ œ๊ฑฐ)
74
+ logger.info("\n[2/3] QLoRA ๋‹จ๋… ๋ชจ๋ธ ๋กœ๋”ฉ...")
75
+ from src.generator.generator_gguf_no_rag import GGUFNoRAGPipeline
76
+ self.pipelines['qlora_only'] = GGUFNoRAGPipeline(config=self.config)
77
+ logger.info("โœ… QLoRA ๋‹จ๋… ๋กœ๋“œ ์™„๋ฃŒ")
78
+
79
+ # 3. Base + RAG (PEFT ์ œ๊ฑฐ)
80
+ logger.info("\n[3/3] Base + RAG ๋ชจ๋ธ ๋กœ๋”ฉ...")
81
+ from src.generator.generator_gguf_base import GGUFBaseRAGPipeline
82
+ self.pipelines['base_rag'] = GGUFBaseRAGPipeline(config=self.config)
83
+ logger.info("โœ… Base + RAG ๋กœ๋“œ ์™„๋ฃŒ")
84
+
85
+ logger.info("\n" + "="*60)
86
+ logger.info(f"โœ… ์ด {len(self.pipelines)}๊ฐœ ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ")
87
+ logger.info("="*60 + "\n")
88
+
89
+ except Exception as e:
90
+ logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
91
+ import traceback
92
+ traceback.print_exc()
93
+ raise
94
+
95
+ def run_single_query(
96
+ self,
97
+ model_name: str,
98
+ query: str,
99
+ query_info: Dict[str, Any]
100
+ ) -> Dict[str, Any]:
101
+ """๋‹จ์ผ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋ชจ๋ธ ์‹คํ–‰"""
102
+ pipeline = self.pipelines[model_name]
103
+
104
+ try:
105
+ start_time = time.time()
106
+ result = pipeline.generate_answer(query)
107
+ total_time = time.time() - start_time
108
+
109
+ # ๊ฒฐ๊ณผ ์ •๋ฆฌ
110
+ return {
111
+ 'model': model_name,
112
+ 'query': query,
113
+ 'category': query_info.get('category', 'unknown'),
114
+ 'expected_type': query_info.get('expected_type', 'unknown'),
115
+ 'answer': result['answer'],
116
+ 'used_retrieval': result.get('used_retrieval', False),
117
+ 'query_type': result.get('query_type', 'unknown'),
118
+ 'search_mode': result.get('search_mode', 'none'),
119
+ 'elapsed_time': total_time,
120
+ 'model_elapsed_time': result.get('elapsed_time', 0),
121
+ 'usage': result.get('usage', {}),
122
+ 'sources_count': len(result.get('sources', [])),
123
+ 'success': True,
124
+ 'error': None
125
+ }
126
+
127
+ except Exception as e:
128
+ logger.error(f"โŒ ์งˆ๋ฌธ ์‹คํ–‰ ์‹คํŒจ [{model_name}]: {e}")
129
+ return {
130
+ 'model': model_name,
131
+ 'query': query,
132
+ 'category': query_info.get('category', 'unknown'),
133
+ 'expected_type': query_info.get('expected_type', 'unknown'),
134
+ 'answer': None,
135
+ 'used_retrieval': False,
136
+ 'query_type': 'error',
137
+ 'search_mode': 'none',
138
+ 'elapsed_time': 0,
139
+ 'model_elapsed_time': 0,
140
+ 'usage': {},
141
+ 'sources_count': 0,
142
+ 'success': False,
143
+ 'error': str(e)
144
+ }
145
+
146
+ def run_experiment(
147
+ self,
148
+ distribution: str = 'all',
149
+ save_results: bool = True
150
+ ) -> Dict[str, List[Dict[str, Any]]]:
151
+ """
152
+ ์‹คํ—˜ ์‹คํ–‰
153
+
154
+ Args:
155
+ distribution: 'in', 'out', 'all'
156
+ save_results: ๊ฒฐ๊ณผ ์ €์žฅ ์—ฌ๋ถ€
157
+ """
158
+ logger.info("\n" + "="*60)
159
+ logger.info("์‹คํ—˜ ์‹œ์ž‘")
160
+ logger.info("="*60)
161
+
162
+ # ๋ฐ์ดํ„ฐ์…‹ ์ค€๋น„
163
+ if distribution == 'in':
164
+ queries_dict = {'in_distribution': self.dataset.get_in_distribution()}
165
+ elif distribution == 'out':
166
+ queries_dict = {'out_distribution': self.dataset.get_out_distribution()}
167
+ else: # 'all'
168
+ queries_dict = self.dataset.get_all_queries()
169
+
170
+ # ๊ฒฐ๊ณผ ์ €์žฅ
171
+ all_results = {
172
+ 'metadata': {
173
+ 'timestamp': self.timestamp,
174
+ 'distribution': distribution,
175
+ 'models': list(self.pipelines.keys()),
176
+ 'total_queries': sum(len(v) for v in queries_dict.values())
177
+ },
178
+ 'results': {}
179
+ }
180
+
181
+ # ๊ฐ ๋ถ„ํฌ์— ๋Œ€ํ•ด ์‹คํ—˜
182
+ for dist_type, queries in queries_dict.items():
183
+ logger.info(f"\n{'='*60}")
184
+ logger.info(f"{dist_type.upper()} ์‹คํ—˜ ({len(queries)}๊ฐœ ์งˆ๋ฌธ)")
185
+ logger.info(f"{'='*60}")
186
+
187
+ dist_results = []
188
+
189
+ # ๊ฐ ์งˆ๋ฌธ์— ๋Œ€ํ•ด
190
+ for i, query_info in enumerate(queries, 1):
191
+ query = query_info['query']
192
+ logger.info(f"\n[{i}/{len(queries)}] ์งˆ๋ฌธ: {query}")
193
+
194
+ # ๊ฐ ๋ชจ๋ธ์— ๋Œ€ํ•ด
195
+ for model_name in self.pipelines.keys():
196
+ logger.info(f" โ†’ {model_name} ์‹คํ–‰ ์ค‘...")
197
+
198
+ result = self.run_single_query(model_name, query, query_info)
199
+ dist_results.append(result)
200
+
201
+ if result['success']:
202
+ logger.info(f" โœ… ์™„๋ฃŒ ({result['elapsed_time']:.2f}์ดˆ)")
203
+ else:
204
+ logger.warning(f" โŒ ์‹คํŒจ: {result['error']}")
205
+
206
+ all_results['results'][dist_type] = dist_results
207
+
208
+ # ๊ฒฐ๊ณผ ์ €์žฅ
209
+ if save_results:
210
+ self._save_results(all_results)
211
+
212
+ logger.info("\n" + "="*60)
213
+ logger.info("โœ… ์‹คํ—˜ ์™„๋ฃŒ")
214
+ logger.info("="*60 + "\n")
215
+
216
+ return all_results
217
+
218
+ def _save_results(self, results: Dict[str, Any]):
219
+ """๊ฒฐ๊ณผ ์ €์žฅ"""
220
+ # JSON ํŒŒ์ผ๋กœ ์ €์žฅ
221
+ output_file = self.output_dir / f"results_{self.timestamp}.json"
222
+
223
+ with open(output_file, 'w', encoding='utf-8') as f:
224
+ json.dump(results, f, ensure_ascii=False, indent=2)
225
+
226
+ logger.info(f"๐Ÿ“ ๊ฒฐ๊ณผ ์ €์žฅ: {output_file}")
227
+
228
+ # ์š”์•ฝ ํ†ต๊ณ„ ์ €์žฅ
229
+ summary_file = self.output_dir / f"summary_{self.timestamp}.txt"
230
+ self._save_summary(results, summary_file)
231
+
232
+ logger.info(f"๐Ÿ“Š ์š”์•ฝ ์ €์žฅ: {summary_file}")
233
+
234
+ def _save_summary(self, results: Dict[str, Any], output_file: Path):
235
+ """์š”์•ฝ ํ†ต๊ณ„ ์ €์žฅ"""
236
+ with open(output_file, 'w', encoding='utf-8') as f:
237
+ f.write("="*60 + "\n")
238
+ f.write("์‹คํ—˜ ๊ฒฐ๊ณผ ์š”์•ฝ\n")
239
+ f.write("="*60 + "\n\n")
240
+
241
+ # ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
242
+ metadata = results['metadata']
243
+ f.write(f"ํƒ€์ž„์Šคํƒฌํ”„: {metadata['timestamp']}\n")
244
+ f.write(f"๋ถ„ํฌ: {metadata['distribution']}\n")
245
+ f.write(f"๋ชจ๋ธ: {', '.join(metadata['models'])}\n")
246
+ f.write(f"์ด ์งˆ๋ฌธ ์ˆ˜: {metadata['total_queries']}\n\n")
247
+
248
+ # ๊ฐ ๋ถ„ํฌ๋ณ„ ํ†ต๊ณ„
249
+ for dist_type, dist_results in results['results'].items():
250
+ f.write(f"\n{'='*60}\n")
251
+ f.write(f"{dist_type.upper()} ๊ฒฐ๊ณผ\n")
252
+ f.write(f"{'='*60}\n\n")
253
+
254
+ # ๋ชจ๋ธ๋ณ„๋กœ ๊ทธ๋ฃนํ™”
255
+ model_stats = {}
256
+ for result in dist_results:
257
+ model = result['model']
258
+ if model not in model_stats:
259
+ model_stats[model] = []
260
+ model_stats[model].append(result)
261
+
262
+ # ๊ฐ ๋ชจ๋ธ๋ณ„ ํ†ต๊ณ„
263
+ for model, model_results in model_stats.items():
264
+ f.write(f"\n[{model}]\n")
265
+
266
+ # ์„ฑ๊ณต/์‹คํŒจ
267
+ success_count = sum(1 for r in model_results if r['success'])
268
+ f.write(f" ์„ฑ๊ณต: {success_count}/{len(model_results)}\n")
269
+
270
+ # ํ‰๊ท  ์‹œ๊ฐ„
271
+ avg_time = sum(r['elapsed_time'] for r in model_results if r['success']) / max(success_count, 1)
272
+ f.write(f" ํ‰๊ท  ์‹œ๊ฐ„: {avg_time:.3f}์ดˆ\n")
273
+
274
+ # ํ‰๊ท  ํ† ํฐ
275
+ total_tokens = sum(r['usage'].get('total_tokens', 0) for r in model_results if r['success'])
276
+ avg_tokens = total_tokens / max(success_count, 1)
277
+ f.write(f" ํ‰๊ท  ํ† ํฐ: {avg_tokens:.1f}\n")
278
+
279
+ # RAG ์‚ฌ์šฉ๋ฅ 
280
+ rag_count = sum(1 for r in model_results if r['used_retrieval'])
281
+ f.write(f" RAG ์‚ฌ์šฉ: {rag_count}/{len(model_results)} ({rag_count/len(model_results)*100:.1f}%)\n")
282
+
283
+
284
+ def main():
285
+ """๋ฉ”์ธ ํ•จ์ˆ˜"""
286
+ logger.info("="*60)
287
+ logger.info("RFPilot ๋ชจ๋ธ ๋น„๊ต ์‹คํ—˜")
288
+ logger.info("="*60)
289
+
290
+ # Config ๋กœ๋“œ
291
+ config = RAGConfig()
292
+
293
+ # ์‹คํ—˜ ์ดˆ๊ธฐํ™”
294
+ experiment = ModelComparison(config=config, output_dir="./experiments/results")
295
+
296
+ # ๋ฐ์ดํ„ฐ์…‹ ํ™•์ธ
297
+ experiment.dataset.print_summary()
298
+ experiment.dataset.print_samples(n=3)
299
+
300
+ # ๋ชจ๋ธ ๋กœ๋“œ
301
+ experiment.load_models()
302
+
303
+ # ์‹คํ—˜ ์‹คํ–‰
304
+ # ์˜ต์…˜ 1: ์ „์ฒด ์‹คํ—˜
305
+ results = experiment.run_experiment(distribution='all', save_results=True)
306
+
307
+ # ์˜ต์…˜ 2: In-Distribution๋งŒ
308
+ # results = experiment.run_experiment(distribution='in', save_results=True)
309
+
310
+ # ์˜ต์…˜ 3: Out-Distribution๋งŒ
311
+ # results = experiment.run_experiment(distribution='out', save_results=True)
312
+
313
+ logger.info(f"\nโœ… ๋ชจ๋“  ์‹คํ—˜ ์™„๋ฃŒ!")
314
+ logger.info(f" ๊ฒฐ๊ณผ ์ €์žฅ ์œ„์น˜: {experiment.output_dir}")
315
+
316
+
317
+ if __name__ == "__main__":
318
+ try:
319
+ main()
320
+ except KeyboardInterrupt:
321
+ logger.info("\nโš ๏ธ ์‚ฌ์šฉ์ž์— ์˜ํ•ด ์ค‘๋‹จ๋จ")
322
+ except Exception as e:
323
+ logger.error(f"\nโŒ ์‹คํ—˜ ์‹คํŒจ: {e}")
324
+ import traceback
325
+ traceback.print_exc()
src/create_eval_dataset.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ํ‰๊ฐ€ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ ๋„๊ตฌ
3
+
4
+ ์‹ค์ œ RFP ๋ฌธ์„œ์—์„œ ์งˆ๋ฌธ-๋‹ต๋ณ€ ์Œ์„ ๋งŒ๋“ค์–ด
5
+ Ground Truth๊ฐ€ ์žˆ๋Š” ํ‰๊ฐ€ ๋ฐ์ดํ„ฐ์…‹์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
6
+
7
+ ์‚ฌ์šฉ๋ฒ•:
8
+ python create_eval_dataset.py --input data/rag_chunks_final.csv --output data/eval_dataset.json
9
+ """
10
+
11
+ import json
12
+ import csv
13
+ import argparse
14
+ from pathlib import Path
15
+ from typing import List, Dict, Any
16
+
17
+
18
+ class EvalDatasetCreator:
19
+ """ํ‰๊ฐ€ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ ํด๋ž˜์Šค"""
20
+
21
+ def __init__(self):
22
+ self.dataset = {
23
+ "metadata": {
24
+ "version": "1.0",
25
+ "description": "RFPilot ํ‰๊ฐ€ ๋ฐ์ดํ„ฐ์…‹",
26
+ "created_by": "manual_annotation"
27
+ },
28
+ "in_distribution": [],
29
+ "out_distribution": []
30
+ }
31
+
32
+ def add_in_distribution_sample(
33
+ self,
34
+ query: str,
35
+ expected_answer: str,
36
+ category: str,
37
+ source_doc: str = None,
38
+ metadata: Dict[str, Any] = None
39
+ ):
40
+ """In-Distribution ์ƒ˜ํ”Œ ์ถ”๊ฐ€"""
41
+ sample = {
42
+ "query": query,
43
+ "expected_answer": expected_answer,
44
+ "category": category,
45
+ "expected_type": "document",
46
+ "source_doc": source_doc,
47
+ "metadata": metadata or {}
48
+ }
49
+ self.dataset["in_distribution"].append(sample)
50
+
51
+ def add_out_distribution_sample(
52
+ self,
53
+ query: str,
54
+ expected_answer: str,
55
+ category: str,
56
+ metadata: Dict[str, Any] = None
57
+ ):
58
+ """Out-Distribution ์ƒ˜ํ”Œ ์ถ”๊ฐ€"""
59
+ sample = {
60
+ "query": query,
61
+ "expected_answer": expected_answer,
62
+ "category": category,
63
+ "expected_type": "out_of_scope",
64
+ "metadata": metadata or {}
65
+ }
66
+ self.dataset["out_distribution"].append(sample)
67
+
68
+ def create_template_dataset(self):
69
+ """ํ…œํ”Œ๋ฆฟ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ (์ˆ˜๋™ ์ž‘์„ฑ์šฉ)"""
70
+ print("๐Ÿ“ ํ…œํ”Œ๋ฆฟ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ ์ค‘...")
71
+
72
+ # In-Distribution ํ…œํ”Œ๋ฆฟ
73
+ in_dist_templates = [
74
+ {
75
+ "query": "์‚ฌ์—… ์ œ์•ˆ์„œ ์ œ์ถœ ๋งˆ๊ฐ์ผ์€ ์–ธ์ œ์ธ๊ฐ€์š”?",
76
+ "expected_answer": "2024๋…„ 3์›” 15์ผ๊นŒ์ง€์ž…๋‹ˆ๋‹ค.", # ์‹ค์ œ ๋ฌธ์„œ์—์„œ ์ถ”์ถœ
77
+ "category": "deadline",
78
+ "source_doc": "RFP_2024_001.hwp",
79
+ "metadata": {"difficulty": "easy"}
80
+ },
81
+ {
82
+ "query": "์ œ์•ˆ ์š”์ฒญ์„œ์˜ ์ œ์ถœ ์„œ๋ฅ˜๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?",
83
+ "expected_answer": "๊ธฐ์ˆ ์ œ์•ˆ์„œ, ๊ฐ€๊ฒฉ์ œ์•ˆ์„œ, ์‚ฌ์—…์ž๋“ฑ๋ก์ฆ, ํšŒ์‚ฌ์†Œ๊ฐœ์„œ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.",
84
+ "category": "requirements",
85
+ "source_doc": "RFP_2024_001.hwp",
86
+ "metadata": {"difficulty": "medium"}
87
+ },
88
+ {
89
+ "query": "์‚ฌ์—… ์˜ˆ์‚ฐ ๊ทœ๋ชจ๋Š” ์–ผ๋งˆ์ธ๊ฐ€์š”?",
90
+ "expected_answer": "์ด 5์–ต์›์ž…๋‹ˆ๋‹ค.",
91
+ "category": "budget",
92
+ "source_doc": "RFP_2024_002.hwp",
93
+ "metadata": {"difficulty": "easy"}
94
+ },
95
+ ]
96
+
97
+ # Out-Distribution ํ…œํ”Œ๋ฆฟ
98
+ out_dist_templates = [
99
+ {
100
+ "query": "ํ•œ๊ตญ์˜ ์ˆ˜๋„๋Š” ์–ด๋””์ธ๊ฐ€์š”?",
101
+ "expected_answer": "์„œ์šธ์ž…๋‹ˆ๋‹ค.",
102
+ "category": "general_knowledge",
103
+ "metadata": {"difficulty": "easy"}
104
+ },
105
+ {
106
+ "query": "ํŒŒ์ด์ฌ์—์„œ ๋ฆฌ์ŠคํŠธ์™€ ํŠœํ”Œ์˜ ์ฐจ์ด๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?",
107
+ "expected_answer": "๋ฆฌ์ŠคํŠธ๋Š” ๊ฐ€๋ณ€(mutable)์ด๊ณ , ํŠœํ”Œ์€ ๋ถˆ๋ณ€(immutable)์ž…๋‹ˆ๋‹ค.",
108
+ "category": "programming",
109
+ "metadata": {"difficulty": "medium"}
110
+ },
111
+ ]
112
+
113
+ # ๋ฐ์ดํ„ฐ์…‹์— ์ถ”๊ฐ€
114
+ for sample in in_dist_templates:
115
+ self.add_in_distribution_sample(**sample)
116
+
117
+ for sample in out_dist_templates:
118
+ self.add_out_distribution_sample(**sample)
119
+
120
+ print(f"โœ… ํ…œํ”Œ๋ฆฟ ์ƒ์„ฑ ์™„๋ฃŒ")
121
+ print(f" - In-Distribution: {len(in_dist_templates)}๊ฐœ")
122
+ print(f" - Out-Distribution: {len(out_dist_templates)}๊ฐœ")
123
+ print(f"\nโš ๏ธ ์ด ํ…œํ”Œ๋ฆฟ์„ ์ˆ˜์ •ํ•˜์—ฌ ์‹ค์ œ ๋ฐ์ดํ„ฐ๋ฅผ ์ฑ„์›Œ์ฃผ์„ธ์š”!")
124
+
125
+ def load_from_csv(self, csv_path: str):
126
+ """CSV์—์„œ ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ"""
127
+ print(f"๐Ÿ“ฅ CSV ๋กœ๋“œ ์ค‘: {csv_path}")
128
+
129
+ with open(csv_path, 'r', encoding='utf-8') as f:
130
+ reader = csv.DictReader(f)
131
+ for row in reader:
132
+ distribution = row.get('distribution', 'in_distribution')
133
+
134
+ if distribution == 'in_distribution':
135
+ self.add_in_distribution_sample(
136
+ query=row['query'],
137
+ expected_answer=row['expected_answer'],
138
+ category=row['category'],
139
+ source_doc=row.get('source_doc'),
140
+ metadata=json.loads(row.get('metadata', '{}'))
141
+ )
142
+ else:
143
+ self.add_out_distribution_sample(
144
+ query=row['query'],
145
+ expected_answer=row['expected_answer'],
146
+ category=row['category'],
147
+ metadata=json.loads(row.get('metadata', '{}'))
148
+ )
149
+
150
+ print(f"โœ… CSV ๋กœ๋“œ ์™„๋ฃŒ")
151
+
152
+ def save_json(self, output_path: str):
153
+ """JSON ํ˜•์‹์œผ๋กœ ์ €์žฅ"""
154
+ output_path = Path(output_path)
155
+ output_path.parent.mkdir(parents=True, exist_ok=True)
156
+
157
+ with open(output_path, 'w', encoding='utf-8') as f:
158
+ json.dump(self.dataset, f, ensure_ascii=False, indent=2)
159
+
160
+ print(f"๐Ÿ’พ ์ €์žฅ ์™„๋ฃŒ: {output_path}")
161
+
162
+ def save_csv_template(self, output_path: str):
163
+ """์ˆ˜๋™ ์ž‘์„ฑ์šฉ CSV ํ…œํ”Œ๋ฆฟ ์ €์žฅ"""
164
+ output_path = Path(output_path)
165
+ output_path.parent.mkdir(parents=True, exist_ok=True)
166
+
167
+ with open(output_path, 'w', encoding='utf-8', newline='') as f:
168
+ writer = csv.DictWriter(f, fieldnames=[
169
+ 'distribution', 'query', 'expected_answer',
170
+ 'category', 'source_doc', 'metadata'
171
+ ])
172
+ writer.writeheader()
173
+
174
+ # In-Distribution ์˜ˆ์‹œ
175
+ writer.writerow({
176
+ 'distribution': 'in_distribution',
177
+ 'query': '์‚ฌ์—… ์ œ์•ˆ์„œ ์ œ์ถœ ๋งˆ๊ฐ์ผ์€ ์–ธ์ œ์ธ๊ฐ€์š”?',
178
+ 'expected_answer': '2024๋…„ 3์›” 15์ผ๊นŒ์ง€์ž…๋‹ˆ๋‹ค.',
179
+ 'category': 'deadline',
180
+ 'source_doc': 'RFP_2024_001.hwp',
181
+ 'metadata': '{"difficulty": "easy"}'
182
+ })
183
+
184
+ # Out-Distribution ์˜ˆ์‹œ
185
+ writer.writerow({
186
+ 'distribution': 'out_distribution',
187
+ 'query': 'ํ•œ๊ตญ์˜ ์ˆ˜๋„๋Š” ์–ด๋””์ธ๊ฐ€์š”?',
188
+ 'expected_answer': '์„œ์šธ์ž…๋‹ˆ๋‹ค.',
189
+ 'category': 'general_knowledge',
190
+ 'source_doc': '',
191
+ 'metadata': '{"difficulty": "easy"}'
192
+ })
193
+
194
+ print(f"๐Ÿ“„ CSV ํ…œํ”Œ๋ฆฟ ์ €์žฅ: {output_path}")
195
+ print(f" โ†’ ์ด ํŒŒ์ผ์„ ์ˆ˜์ •ํ•˜์—ฌ ์‹ค์ œ ๋ฐ์ดํ„ฐ๋ฅผ ์ฑ„์›Œ์ฃผ์„ธ์š”!")
196
+
197
+ def print_summary(self):
198
+ """๋ฐ์ดํ„ฐ์…‹ ์š”์•ฝ ์ถœ๋ ฅ"""
199
+ print("\n" + "="*60)
200
+ print("๋ฐ์ดํ„ฐ์…‹ ์š”์•ฝ")
201
+ print("="*60)
202
+ print(f"In-Distribution: {len(self.dataset['in_distribution'])}๊ฐœ")
203
+ print(f"Out-Distribution: {len(self.dataset['out_distribution'])}๊ฐœ")
204
+ print(f"์ด ์ƒ˜ํ”Œ: {len(self.dataset['in_distribution']) + len(self.dataset['out_distribution'])}๊ฐœ")
205
+ print("="*60 + "\n")
206
+
207
+
208
+ def main():
209
+ parser = argparse.ArgumentParser(description='ํ‰๊ฐ€ ๋ฐ์ดํ„ฐ์…‹ ์ƒ์„ฑ')
210
+ parser.add_argument('--mode', choices=['template', 'csv'], default='template',
211
+ help='์ƒ์„ฑ ๋ชจ๋“œ: template (ํ…œํ”Œ๋ฆฟ ์ƒ์„ฑ) ๋˜๋Š” csv (CSV์—์„œ ๋กœ๋“œ)')
212
+ parser.add_argument('--input', type=str, help='์ž…๋ ฅ CSV ํŒŒ์ผ ๊ฒฝ๋กœ')
213
+ parser.add_argument('--output', type=str, default='data/eval_dataset.json',
214
+ help='์ถœ๋ ฅ JSON ํŒŒ์ผ ๊ฒฝ๋กœ')
215
+ parser.add_argument('--csv-template', type=str, default='data/eval_template.csv',
216
+ help='CSV ํ…œํ”Œ๋ฆฟ ์ €์žฅ ๊ฒฝ๋กœ')
217
+
218
+ args = parser.parse_args()
219
+
220
+ creator = EvalDatasetCreator()
221
+
222
+ if args.mode == 'template':
223
+ print("๐Ÿ“ ํ…œํ”Œ๋ฆฟ ๋ชจ๋“œ")
224
+ creator.create_template_dataset()
225
+ creator.save_json(args.output)
226
+ creator.save_csv_template(args.csv_template)
227
+
228
+ elif args.mode == 'csv':
229
+ if not args.input:
230
+ print("โŒ CSV ๋ชจ๋“œ์—์„œ๋Š” --input ์˜ต์…˜์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
231
+ return
232
+
233
+ print("๐Ÿ“ฅ CSV ๋ชจ๋“œ")
234
+ creator.load_from_csv(args.input)
235
+ creator.save_json(args.output)
236
+
237
+ creator.print_summary()
238
+
239
+ print("\nโœ… ์™„๋ฃŒ!")
240
+ print(f"\n๋‹ค์Œ ๋‹จ๊ณ„:")
241
+ print(f"1. {args.csv_template} ํŒŒ์ผ์„ ์—ด์–ด์„œ ์‹ค์ œ ๋ฐ์ดํ„ฐ ์ž‘์„ฑ")
242
+ print(f"2. python create_eval_dataset.py --mode csv --input {args.csv_template} --output {args.output}")
243
+ print(f"3. ์ƒ์„ฑ๋œ {args.output}์„ ์‹คํ—˜์— ์‚ฌ์šฉ")
244
+
245
+
246
+ if __name__ == "__main__":
247
+ main()
src/eval_dataset.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Dict, List, Any
4
+
5
+ class EvalDataset:
6
+ def __init__(self, dataset_path: str = "data/eval_dataset.json"):
7
+ self.dataset_path = Path(dataset_path)
8
+ self.data = self._load_dataset()
9
+
10
+ def _load_dataset(self) -> Dict[str, Any]:
11
+ with open(self.dataset_path, 'r', encoding='utf-8') as f:
12
+ return json.load(f)
13
+
14
+ def get_in_distribution(self) -> List[Dict[str, Any]]:
15
+ return self.data.get('in_distribution', [])
16
+
17
+ def get_out_distribution(self) -> List[Dict[str, Any]]:
18
+ return self.data.get('out_distribution', [])
19
+
20
+ def get_all_queries(self) -> Dict[str, List[Dict[str, Any]]]:
21
+ return {
22
+ 'in_distribution': self.get_in_distribution(),
23
+ 'out_distribution': self.get_out_distribution()
24
+ }
25
+
26
+ def print_summary(self):
27
+ print(f"In-Distribution: {len(self.get_in_distribution())}๊ฐœ")
28
+ print(f"Out-Distribution: {len(self.get_out_distribution())}๊ฐœ")
29
+
30
+ def print_samples(self, n: int = 3):
31
+ print("\n[In-Distribution ์ƒ˜ํ”Œ]")
32
+ for item in self.get_in_distribution()[:n]:
33
+ print(f" - {item['query']}")
src/generator/generator_gguf.py ADDED
@@ -0,0 +1,598 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ from typing import Optional, Dict, Any, List
3
+ import logging
4
+ import time
5
+ import os
6
+
7
+ from src.utils.config import RAGConfig
8
+ from src.router.query_router import QueryRouter
9
+ from src.prompts.dynamic_prompts import PromptManager
10
+
11
+ # ๋กœ๊น… ์„ค์ •
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class GGUFGenerator:
17
+ """
18
+ GGUF ๊ธฐ๋ฐ˜ Llama-3 ์ƒ์„ฑ๊ธฐ
19
+
20
+ llama.cpp๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ GGUF ํฌ๋งท ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜๊ณ 
21
+ ์ž…์ฐฐ ๊ด€๋ จ ์งˆ์˜์‘๋‹ต์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ model_path: str,
27
+ n_gpu_layers: int = 0,
28
+ n_ctx: int = 8192,
29
+ n_threads: int = 8,
30
+ config = None,
31
+ max_new_tokens: int = 256,
32
+ temperature: float = 0.7,
33
+ top_p: float = 0.9,
34
+ system_prompt: str = "๋‹น์‹ ์€ RFP(์ œ์•ˆ์š”์ฒญ์„œ) ๋ถ„์„ ๋ฐ ์š”์•ฝ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค."
35
+ ):
36
+ """
37
+ ์ƒ์„ฑ๊ธฐ ์ดˆ๊ธฐํ™”
38
+
39
+ Args:
40
+ model_path: GGUF ๋ชจ๋ธ ํŒŒ์ผ ๊ฒฝ๋กœ
41
+ n_gpu_layers: GPU์— ์˜ฌ๋ฆด ๋ ˆ์ด์–ด ์ˆ˜ (0 = CPU๋งŒ, 35 = ์ „์ฒด GPU)
42
+ n_ctx: ์ตœ๋Œ€ ์ปจํ…์ŠคํŠธ ๊ธธ์ด
43
+ n_threads: CPU ์Šค๋ ˆ๋“œ ์ˆ˜
44
+ max_new_tokens: ์ตœ๋Œ€ ์ƒ์„ฑ ํ† ํฐ ์ˆ˜
45
+ temperature: ์ƒ์„ฑ ๋‹ค์–‘์„ฑ (0.0~1.0)
46
+ top_p: Nucleus sampling ํŒŒ๋ผ๋ฏธํ„ฐ
47
+ system_prompt: ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
48
+ """
49
+ self.config = config or RAGConfig()
50
+ self.model_path = model_path
51
+ self.n_gpu_layers = n_gpu_layers
52
+ self.n_ctx = n_ctx
53
+ self.n_threads = n_threads
54
+ self.max_new_tokens = max_new_tokens
55
+ self.temperature = temperature
56
+ self.top_p = top_p
57
+ self.system_prompt = system_prompt
58
+
59
+ # ๋ชจ๋ธ (๋‚˜์ค‘์— ๋กœ๋“œ)
60
+ self.model = None
61
+
62
+ logger.info(f"GGUFGenerator ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
63
+
64
+ def load_model(self) -> None:
65
+ """
66
+ GGUF ๋ชจ๋ธ ๋กœ๋“œ
67
+
68
+ ๋กœ์ง:
69
+ 1. USE_MODEL_HUB ํ™•์ธ
70
+ 2-A. True โ†’ Hugging Face Hub์—์„œ ๋‹ค์šด๋กœ๋“œ
71
+ 2-B. False โ†’ ๋กœ์ปฌ ํŒŒ์ผ ์‚ฌ์šฉ
72
+ 3. ๋ชจ๋ธ ๋กœ๋“œ
73
+ """
74
+
75
+ # ์ค‘๋ณต ๋กœ๋“œ ๋ฐฉ์ง€
76
+ if self.model is not None:
77
+ logger.info("๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋“œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")
78
+ return
79
+
80
+ try:
81
+ # Config์—์„œ USE_MODEL_HUB ํ™•์ธ (์—†์œผ๋ฉด True ๊ธฐ๋ณธ๊ฐ’)
82
+ use_model_hub = getattr(self.config, 'USE_MODEL_HUB', True)
83
+
84
+ # Model Hub ์‚ฌ์šฉ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ๊ฒฝ๋กœ ๊ฒฐ์ •
85
+ if use_model_hub:
86
+ # === Model Hub์—์„œ ๋‹ค์šด๋กœ๋“œ ===
87
+ model_hub_repo = getattr(self.config, 'MODEL_HUB_REPO', 'beomi/Llama-3-Open-Ko-8B-gguf')
88
+ model_hub_filename = getattr(self.config, 'MODEL_HUB_FILENAME', 'ggml-model-Q4_K_M.gguf')
89
+ model_cache_dir = getattr(self.config, 'MODEL_CACHE_DIR', '.cache/models')
90
+
91
+ logger.info(f"๐Ÿ“ฅ Model Hub์—์„œ ๋‹ค์šด๋กœ๋“œ: {model_hub_repo}")
92
+
93
+ from huggingface_hub import hf_hub_download
94
+
95
+ model_path = hf_hub_download(
96
+ repo_id=model_hub_repo,
97
+ filename=model_hub_filename,
98
+ cache_dir=model_cache_dir,
99
+ local_dir=model_cache_dir,
100
+ local_dir_use_symlinks=False # ์‹ฌ๋ณผ๋ฆญ ๋งํฌ ๋Œ€์‹  ์‹ค์ œ ๋ณต์‚ฌ
101
+ )
102
+
103
+ logger.info(f"โœ… ๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ: {model_path}")
104
+
105
+ else:
106
+ # === ๋กœ์ปฌ ํŒŒ์ผ ์‚ฌ์šฉ ===
107
+ model_path = self.model_path # ์ƒ์„ฑ์ž์—์„œ ๋ฐ›์€ ๊ฒฝ๋กœ ์‚ฌ์šฉ
108
+
109
+ if not os.path.exists(model_path):
110
+ raise FileNotFoundError(
111
+ f"โŒ ๋กœ์ปฌ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {model_path}\n"
112
+ f" USE_MODEL_HUB=true๋กœ ์„ค์ •ํ•˜๊ฑฐ๋‚˜ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ค€๋น„ํ•˜์„ธ์š”."
113
+ )
114
+
115
+ logger.info(f"๐Ÿ“‚ ๋กœ์ปฌ ๋ชจ๋ธ ์‚ฌ์šฉ: {model_path}")
116
+
117
+ # === ๊ณตํ†ต: ๋ชจ๋ธ ๋กœ๋“œ ===
118
+ logger.info(f"๐Ÿš€ GGUF ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
119
+ logger.info(f" GPU ๋ ˆ์ด์–ด: {self.n_gpu_layers}")
120
+ logger.info(f" ์ปจํ…์ŠคํŠธ: {self.n_ctx}")
121
+
122
+ self.model = Llama(
123
+ model_path=model_path,
124
+ n_gpu_layers=self.n_gpu_layers,
125
+ n_ctx=self.n_ctx,
126
+ n_threads=self.n_threads,
127
+ verbose=True, # โœ… ๋””๋ฒ„๊ทธ ๋กœ๊ทธ ํ™œ์„ฑํ™”
128
+ )
129
+
130
+ # โœ… ์‹ค์ œ ์ ์šฉ๋œ n_ctx ํ™•์ธ
131
+ actual_n_ctx = self.model.n_ctx()
132
+ logger.info("โœ… GGUF ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ!")
133
+ logger.info(f" - ์„ค์ •ํ•œ n_ctx: {self.n_ctx}")
134
+ logger.info(f" - ์‹ค์ œ n_ctx: {actual_n_ctx}")
135
+
136
+ if actual_n_ctx < self.n_ctx:
137
+ logger.warning(f"โš ๏ธ n_ctx๊ฐ€ ์˜ˆ์ƒ๋ณด๋‹ค ์ž‘์Šต๋‹ˆ๋‹ค: {actual_n_ctx} < {self.n_ctx}")
138
+ logger.warning(f" ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. n_gpu_layers๋ฅผ ์ค„์—ฌ๋ณด์„ธ์š”.")
139
+
140
+ except FileNotFoundError as e:
141
+ logger.error(f"โŒ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {e}")
142
+ raise
143
+ except Exception as e:
144
+ logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
145
+ raise RuntimeError(f"๋ชจ๋ธ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
146
+
147
+ def format_prompt(
148
+ self,
149
+ question: str,
150
+ context: Optional[str] = None,
151
+ system_prompt: Optional[str] = None
152
+ ) -> str:
153
+ """
154
+ GGUF ๋ชจ๋ธ์šฉ ๊ฐ„๋‹จํ•œ ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…
155
+
156
+ Llama-3 ํŠน์ˆ˜ ํ† ํฐ ๋Œ€์‹  ์ˆœ์ˆ˜ ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ํ…œํ”Œ๋ฆฟ ์‚ฌ์šฉ
157
+ """
158
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •
159
+ if system_prompt is None:
160
+ system_prompt = self.system_prompt
161
+
162
+ # ์ปจํ…์ŠคํŠธ ํฌํ•จ ์—ฌ๋ถ€
163
+ if context is not None:
164
+ user_message = f"์ฐธ๊ณ  ๋ฌธ์„œ:\n{context}\n\n์งˆ๋ฌธ: {question}"
165
+ else:
166
+ user_message = question
167
+
168
+ # ๊ฐ„๋‹จํ•œ ํ•œ๊ตญ์–ด ํ…œํ”Œ๋ฆฟ (ํŠน์ˆ˜ ํ† ํฐ ์—†์Œ)
169
+ formatted_prompt = f"""### ์‹œ์Šคํ…œ
170
+ {system_prompt}
171
+
172
+ ### ์‚ฌ์šฉ์ž
173
+ {user_message}
174
+
175
+ ### ๋‹ต๋ณ€
176
+ """
177
+
178
+ return formatted_prompt
179
+
180
+ def generate(
181
+ self,
182
+ prompt: str,
183
+ max_new_tokens: Optional[int] = None,
184
+ temperature: Optional[float] = None,
185
+ top_p: Optional[float] = None,
186
+ ) -> str:
187
+ """
188
+ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅ๋ฐ›์•„ ์‘๋‹ต ์ƒ์„ฑ
189
+
190
+ Args:
191
+ prompt: ํฌ๋งท๋œ ํ”„๋กฌํ”„ํŠธ
192
+ max_new_tokens: ์ตœ๋Œ€ ์ƒ์„ฑ ํ† ํฐ ์ˆ˜
193
+ temperature: ์ƒ์„ฑ ๋‹ค์–‘์„ฑ
194
+ top_p: Nucleus sampling
195
+
196
+ Returns:
197
+ ์ƒ์„ฑ๋œ ์‘๋‹ต ํ…์ŠคํŠธ
198
+
199
+ Raises:
200
+ RuntimeError: ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
201
+ """
202
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ™•์ธ
203
+ if self.model is None:
204
+ raise RuntimeError(
205
+ "๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. load_model()์„ ๋จผ์ € ํ˜ธ์ถœํ•˜์„ธ์š”."
206
+ )
207
+
208
+ # ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ •
209
+ if max_new_tokens is None:
210
+ max_new_tokens = self.max_new_tokens
211
+ if temperature is None:
212
+ temperature = self.temperature
213
+ if top_p is None:
214
+ top_p = self.top_p
215
+
216
+ try:
217
+ logger.info(f"๐Ÿ”„ ์ƒ์„ฑ ์‹œ์ž‘ (max_tokens={max_new_tokens}, temp={temperature})")
218
+ start_time = time.time()
219
+
220
+ # ์ƒ์„ฑ
221
+ output = self.model(
222
+ prompt,
223
+ max_tokens=max_new_tokens,
224
+ temperature=temperature,
225
+ top_p=top_p,
226
+ echo=False, # ํ”„๋กฌํ”„ํŠธ ๋ฐ˜๋ณต ์•ˆ ํ•จ
227
+ stop=[
228
+ # ๊ตฌ๋ถ„์ž
229
+ "###", "\n\n###",
230
+ "### ์‚ฌ์šฉ์ž", "\n์‚ฌ์šฉ์ž:",
231
+ "</s>",
232
+ # ๋ฉ”ํƒ€ ํ…์ŠคํŠธ ์ฐจ๋‹จ
233
+ "ํ•œ๊ตญ์–ด ๋‹ต๋ณ€", "ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€", "์ง€์นจ:",
234
+ "๋ฌธ์žฅ", "(๋ฌธ์žฅ",
235
+ # โœ… ์งˆ๋ฌธ ํŒจํ„ด ์ฐจ๋‹จ (๋‹ต๋ณ€ ํ›„ ์งˆ๋ฌธ ์ƒ์„ฑ ๋ฐฉ์ง€)
236
+ "\n\n", # ๋‹จ๋ฝ ๊ตฌ๋ถ„
237
+ "?", # ์งˆ๋ฌธ ๊ธฐํ˜ธ
238
+ "์š”?", "๊นŒ?", "๋‚˜์š”?", "์Šต๋‹ˆ๊นŒ?" # ์งˆ๋ฌธ ์–ด๋ฏธ
239
+ ],
240
+ )
241
+
242
+ elapsed = time.time() - start_time
243
+ logger.info(f"โœ… ์ƒ์„ฑ ์™„๋ฃŒ: {elapsed:.2f}์ดˆ")
244
+
245
+ # ์‘๋‹ต ์ถ”์ถœ
246
+ response = output['choices'][0]['text'].strip()
247
+
248
+ logger.info(f"๐Ÿ“ ์‘๋‹ต ๊ธธ์ด: {len(response)} ๊ธ€์ž")
249
+ return response
250
+
251
+ except Exception as e:
252
+ logger.error(f"โŒ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
253
+ raise RuntimeError(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
254
+
255
+ def chat(
256
+ self,
257
+ question: str,
258
+ context: Optional[str] = None,
259
+ system_prompt=None,
260
+ **kwargs
261
+ ) -> str:
262
+ """
263
+ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต ์ƒ์„ฑ (ํ†ตํ•ฉ ๋ฉ”์„œ๋“œ)
264
+
265
+ Args:
266
+ question: ์‚ฌ์šฉ์ž ์งˆ๋ฌธ
267
+ context: ์„ ํƒ์  ์ปจํ…์ŠคํŠธ
268
+ system_prompt: ์„ ํƒ์  ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
269
+ **kwargs: generate() ๋ฉ”์„œ๋“œ์— ์ „๋‹ฌ๋  ์ถ”๊ฐ€ ํŒŒ๋ผ๋ฏธํ„ฐ
270
+
271
+ Returns:
272
+ ์ƒ์„ฑ๋œ ์‘๋‹ต
273
+ """
274
+ # ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…
275
+ prompt = self.format_prompt(
276
+ question=question,
277
+ context=context,
278
+ system_prompt=system_prompt
279
+ )
280
+
281
+ # ์‘๋‹ต ์ƒ์„ฑ
282
+ response = self.generate(prompt, **kwargs)
283
+
284
+ return response
285
+
286
+
287
+ class GGUFRAGPipeline:
288
+ """
289
+ GGUF ์ƒ์„ฑ๊ธฐ + RAG ํ†ตํ•ฉ ํŒŒ์ดํ”„๋ผ์ธ
290
+
291
+ chatbot_app.py์™€ ํ˜ธํ™˜๋˜๋Š” ์ธํ„ฐํŽ˜์ด์Šค ์ œ๊ณต
292
+ """
293
+
294
+ def __init__(
295
+ self,
296
+ config=None,
297
+ model: str = None, # ํ˜ธํ™˜์„ฑ์šฉ (์‚ฌ์šฉ ์•ˆ ํ•จ)
298
+ top_k: int = None,
299
+ # GPU ์„ค์ • (์„ ํƒ์ , config ์˜ค๋ฒ„๋ผ์ด๋“œ)
300
+ n_gpu_layers: int = None,
301
+ n_ctx: int = None,
302
+ n_threads: int = None,
303
+ max_new_tokens: int = None,
304
+ temperature: float = None,
305
+ top_p: float = None,
306
+ search_mode: str = None,
307
+ alpha: float = None
308
+ ):
309
+ """
310
+ ์ดˆ๊ธฐํ™”
311
+
312
+ Args:
313
+ config: RAGConfig ๊ฐ์ฒด
314
+ model: ๋ชจ๋ธ ์ด๋ฆ„ (์‚ฌ์šฉ ์•ˆ ํ•จ, ํ˜ธํ™˜์„ฑ์šฉ)
315
+ top_k: ๊ธฐ๋ณธ ๊ฒ€์ƒ‰ ๋ฌธ์„œ ์ˆ˜
316
+ n_gpu_layers: GPU ๋ ˆ์ด์–ด ์ˆ˜ (config ์˜ค๋ฒ„๋ผ์ด๋“œ)
317
+ n_ctx: ์ปจํ…์ŠคํŠธ ๊ธธ์ด (config ์˜ค๋ฒ„๋ผ์ด๋“œ)
318
+ n_threads: CPU ์Šค๋ ˆ๋“œ ์ˆ˜ (config ์˜ค๋ฒ„๋ผ์ด๋“œ)
319
+ max_new_tokens: ์ตœ๋Œ€ ์ƒ์„ฑ ํ† ํฐ (config ์˜ค๋ฒ„๋ผ์ด๋“œ)
320
+ temperature: ์ƒ์„ฑ ๋‹ค์–‘์„ฑ (config ์˜ค๋ฒ„๋ผ์ด๋“œ)
321
+ top_p: Nucleus sampling (config ์˜ค๋ฒ„๋ผ์ด๋“œ)
322
+ search_mode: ๊ฒ€์ƒ‰ ๋ชจ๋“œ
323
+ alpha: ์ž„๋ฒ ๋”ฉ ๊ฐ€์ค‘์น˜
324
+ """
325
+ self.config = config or RAGConfig()
326
+
327
+ # Config์—์„œ ๊ธฐ๋ณธ๊ฐ’ ๊ฐ€์ ธ์˜ค๊ธฐ (์—†์œผ๋ฉด fallback)
328
+ self.top_k = top_k or getattr(self.config, 'DEFAULT_TOP_K', 10)
329
+
330
+ # ๊ฒ€์ƒ‰ ์„ค์ •
331
+ self.search_mode = search_mode or getattr(self.config, 'DEFAULT_SEARCH_MODE', 'hybrid_rerank')
332
+ self.alpha = alpha if alpha is not None else getattr(self.config, 'DEFAULT_ALPHA', 0.5)
333
+
334
+ # Retriever ์ดˆ๊ธฐํ™” (RAGRetriever ์‚ฌ์šฉ)
335
+ logger.info("RAGRetriever ์ดˆ๊ธฐํ™” ์ค‘...")
336
+ from src.retriever.retriever import RAGRetriever
337
+ self.retriever = RAGRetriever(config=self.config)
338
+
339
+ # GGUF ์„ค์ • (ํŒŒ๋ผ๋ฏธํ„ฐ๊ฐ€ ์ฃผ์–ด์ง€๋ฉด config ์˜ค๋ฒ„๋ผ์ด๋“œ, ์—†์œผ๋ฉด ๊ธฐ๋ณธ๊ฐ’)
340
+ gguf_n_gpu_layers = n_gpu_layers if n_gpu_layers is not None else getattr(self.config, 'GGUF_N_GPU_LAYERS', 35)
341
+ gguf_n_ctx = n_ctx if n_ctx is not None else getattr(self.config, 'GGUF_N_CTX', 2048)
342
+ gguf_n_threads = n_threads if n_threads is not None else getattr(self.config, 'GGUF_N_THREADS', 4)
343
+ gguf_max_new_tokens = max_new_tokens if max_new_tokens is not None else getattr(self.config, 'GGUF_MAX_NEW_TOKENS', 512)
344
+ gguf_temperature = temperature if temperature is not None else getattr(self.config, 'GGUF_TEMPERATURE', 0.7)
345
+ gguf_top_p = top_p if top_p is not None else getattr(self.config, 'GGUF_TOP_P', 0.9)
346
+
347
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ (fallback)
348
+ gguf_model_path = getattr(self.config, 'GGUF_MODEL_PATH', '.cache/models/llama-3-ko-8b.gguf')
349
+
350
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ (fallback)
351
+ system_prompt = getattr(self.config, 'SYSTEM_PROMPT', '๋‹น์‹ ์€ ํ•œ๊ตญ ๊ณต๊ณต๊ธฐ๊ด€ ์‚ฌ์—…์ œ์•ˆ์„œ ๋ถ„์„ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.')
352
+
353
+ # GGUFGenerator ์ดˆ๊ธฐํ™”
354
+ logger.info("GGUFGenerator ์ดˆ๊ธฐํ™” ์ค‘...")
355
+ logger.info(f" GPU ๋ ˆ์ด์–ด: {gguf_n_gpu_layers}")
356
+ logger.info(f" ์ปจํ…์ŠคํŠธ: {gguf_n_ctx}")
357
+ logger.info(f" ์Šค๋ ˆ๋“œ: {gguf_n_threads}")
358
+ logger.info(f" ๋ชจ๋ธ ๊ฒฝ๋กœ: {gguf_model_path}")
359
+
360
+ self.generator = GGUFGenerator(
361
+ model_path=gguf_model_path,
362
+ n_gpu_layers=gguf_n_gpu_layers,
363
+ n_ctx=gguf_n_ctx,
364
+ n_threads=gguf_n_threads,
365
+ config=self.config,
366
+ max_new_tokens=gguf_max_new_tokens,
367
+ temperature=gguf_temperature,
368
+ top_p=gguf_top_p,
369
+ system_prompt=system_prompt
370
+ )
371
+
372
+ # ๋ชจ๋ธ ๋กœ๋“œ (์‹œ๊ฐ„ ์†Œ์š”)
373
+ logger.info("GGUF ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
374
+ self.generator.load_model()
375
+
376
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ
377
+ self.chat_history: List[Dict] = []
378
+
379
+ # ๋งˆ์ง€๋ง‰ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ €์žฅ (sources ๋ฐ˜ํ™˜์šฉ)
380
+ self._last_retrieved_docs = []
381
+
382
+ logger.info("โœ… GGUFRAGPipeline ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
383
+ logger.info(f" - ๊ฒ€์ƒ‰ ๋ชจ๋“œ: {self.search_mode}")
384
+ logger.info(f" - ๊ธฐ๋ณธ top_k: {self.top_k}")
385
+
386
+ def _retrieve_and_format(self, query: str) -> str:
387
+ """๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ ๋ฐ ์ปจํ…์ŠคํŠธ ํฌ๋งทํŒ…"""
388
+ # ๊ฒ€์ƒ‰ ๋ชจ๋“œ์— ๋”ฐ๋ผ ๋ฌธ์„œ ๊ฒ€์ƒ‰ (RAGRetriever ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ)
389
+ if self.search_mode == "embedding":
390
+ docs = self.retriever.search(query, top_k=self.top_k)
391
+ elif self.search_mode == "embedding_rerank":
392
+ docs = self.retriever.search_with_rerank(query, top_k=self.top_k)
393
+ elif self.search_mode == "hybrid":
394
+ docs = self.retriever.hybrid_search(
395
+ query, top_k=self.top_k, alpha=self.alpha
396
+ )
397
+ elif self.search_mode == "hybrid_rerank":
398
+ docs = self.retriever.hybrid_search_with_rerank(
399
+ query, top_k=self.top_k, alpha=self.alpha
400
+ )
401
+ else:
402
+ docs = self.retriever.search(query, top_k=self.top_k)
403
+
404
+ # ๋งˆ์ง€๋ง‰ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ €์žฅ
405
+ self._last_retrieved_docs = docs
406
+
407
+ # ์ปจํ…์ŠคํŠธ ํฌ๋งทํŒ…
408
+ return self._format_context(docs)
409
+
410
+ def _format_context(self, retrieved_docs: list) -> str:
411
+ """
412
+ ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋ฅผ ์ปจํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜
413
+
414
+ ์ปจํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ๊ธธ๋ฉด ์ž๋™์œผ๋กœ ์ค„์ž„ (ํ† ํฐ ์ œํ•œ ๋Œ€์‘)
415
+ """
416
+ if not retrieved_docs:
417
+ return "๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
418
+
419
+ context_parts = []
420
+ max_context_chars = 8000 # ๋Œ€๋žต 2000 ํ† ํฐ ์ •๋„ (์—ฌ์œ  ์žˆ๊ฒŒ)
421
+
422
+ current_length = 0
423
+ for i, doc in enumerate(retrieved_docs, 1):
424
+ doc_text = f"[๋ฌธ์„œ {i}]\n{doc['content']}\n"
425
+ doc_length = len(doc_text)
426
+
427
+ # ์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ฒดํฌ
428
+ if current_length + doc_length > max_context_chars:
429
+ logger.warning(f"โš ๏ธ ์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ: {i-1}๊ฐœ ๋ฌธ์„œ๋งŒ ์‚ฌ์šฉ (์ตœ๋Œ€ {max_context_chars}์ž)")
430
+ break
431
+
432
+ context_parts.append(doc_text)
433
+ current_length += doc_length
434
+
435
+ return "\n".join(context_parts)
436
+
437
+ def _format_sources(self, retrieved_docs: list) -> list:
438
+ """๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋ฅผ sources ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
439
+ sources = []
440
+ for doc in retrieved_docs:
441
+ source_info = {
442
+ 'content': doc['content'],
443
+ 'metadata': doc['metadata'],
444
+ 'filename': doc.get('filename', 'N/A'),
445
+ 'organization': doc.get('organization', 'N/A')
446
+ }
447
+
448
+ # ๊ฒ€์ƒ‰ ๋ชจ๋“œ์— ๋”ฐ๋ผ ์ ์ˆ˜ ํ•„๋“œ๊ฐ€ ๋‹ค๋ฆ„
449
+ if 'rerank_score' in doc:
450
+ source_info['score'] = doc['rerank_score']
451
+ source_info['score_type'] = 'rerank'
452
+ elif 'hybrid_score' in doc:
453
+ source_info['score'] = doc['hybrid_score']
454
+ source_info['score_type'] = 'hybrid'
455
+ elif 'relevance_score' in doc:
456
+ source_info['score'] = doc['relevance_score']
457
+ source_info['score_type'] = 'embedding'
458
+ else:
459
+ source_info['score'] = 0
460
+ source_info['score_type'] = 'unknown'
461
+
462
+ sources.append(source_info)
463
+
464
+ return sources
465
+
466
+ def _estimate_usage(self, query: str, answer: str) -> dict:
467
+ """ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰ ์ถ”์ •"""
468
+ # ๊ฐ„๋‹จํ•œ ๋‹จ์–ด ์ˆ˜ ๊ธฐ๋ฐ˜ ์ถ”์ •
469
+ prompt_tokens = len(query.split()) * 2
470
+ completion_tokens = len(answer.split()) * 2
471
+
472
+ return {
473
+ 'total_tokens': prompt_tokens + completion_tokens,
474
+ 'prompt_tokens': prompt_tokens,
475
+ 'completion_tokens': completion_tokens
476
+ }
477
+
478
+ def generate_answer(
479
+ self,
480
+ query: str,
481
+ top_k: int = None,
482
+ search_mode: str = None,
483
+ alpha: float = None
484
+ ) -> dict:
485
+ """
486
+ ๋‹ต๋ณ€ ์ƒ์„ฑ (chatbot_app.py ํ˜ธํ™˜ ๋ฉ”์ธ ๋ฉ”์„œ๋“œ)
487
+
488
+ Args:
489
+ query: ์งˆ๋ฌธ
490
+ top_k: ๊ฒ€์ƒ‰ํ•  ๋ฌธ์„œ ์ˆ˜
491
+ search_mode: ๊ฒ€์ƒ‰ ๋ชจ๋“œ
492
+ alpha: ์ž„๋ฒ ๋”ฉ ๊ฐ€์ค‘์น˜
493
+
494
+ Returns:
495
+ dict: answer, sources, search_mode, usage, elapsed_time, used_retrieval
496
+ """
497
+ try:
498
+ start_time = time.time()
499
+
500
+ # ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ • (๊ฒ€์ƒ‰ ์ „์— ๋จผ์ € ์„ค์ •)
501
+ if top_k is not None:
502
+ self.top_k = top_k
503
+ if search_mode is not None:
504
+ self.search_mode = search_mode
505
+ if alpha is not None:
506
+ self.alpha = alpha
507
+
508
+ # ===== Router๋กœ ๊ฒ€์ƒ‰ ์—ฌ๋ถ€ ๊ฒฐ์ • =====
509
+ router = QueryRouter()
510
+ classification = router.classify(query)
511
+ query_type = classification['type'] # 'greeting'/'thanks'/'document'/'out_of_scope'
512
+
513
+ logger.info(f"๐Ÿ“ ๋ถ„๋ฅ˜: {query_type} "
514
+ f"(์‹ ๋ขฐ๋„: {classification['confidence']:.2f})")
515
+
516
+ # 2. ํƒ€์ž…๋ณ„ ์ฒ˜๋ฆฌ
517
+ if query_type in ['greeting', 'thanks', 'out_of_scope']:
518
+ # ๊ฒ€์ƒ‰ ์Šคํ‚ต
519
+ context = None
520
+ used_retrieval = False
521
+ self._last_retrieved_docs = []
522
+
523
+ # ๋™์  ํ”„๋กฌํ”„ํŠธ ์„ ํƒ (GGUF์šฉ)
524
+ system_prompt = PromptManager.get_prompt(query_type, model_type="gguf")
525
+ logger.info(f"โญ๏ธ RAG ์Šคํ‚ต: {query_type}")
526
+
527
+ elif query_type == 'document':
528
+ # RAG ์ˆ˜ํ–‰
529
+ context = self._retrieve_and_format(query)
530
+ used_retrieval = True
531
+
532
+ # ๋™์  ํ”„๋กฌํ”„ํŠธ (GGUF์šฉ, context ํฌํ•จ)
533
+ system_prompt = PromptManager.get_prompt('document', model_type="gguf")
534
+ logger.info(f"๐Ÿ” RAG ์ˆ˜ํ–‰: {len(self._last_retrieved_docs)}๊ฐœ ๋ฌธ์„œ")
535
+
536
+ # 3. ๋‹ต๋ณ€ ์ƒ์„ฑ (system_prompt ์ „๋‹ฌ)
537
+ answer = self.generator.chat(
538
+ question=query,
539
+ context=context,
540
+ system_prompt=system_prompt
541
+ )
542
+
543
+ elapsed_time = time.time() - start_time
544
+
545
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ์— ์ถ”๊ฐ€
546
+ self.chat_history.append({"role": "user", "content": query})
547
+ self.chat_history.append({"role": "assistant", "content": answer})
548
+
549
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜ (RAGPipeline๊ณผ ๋™์ผ ํ˜•์‹)
550
+ return {
551
+ 'answer': answer,
552
+ 'sources': self._format_sources(self._last_retrieved_docs),
553
+ 'used_retrieval': used_retrieval,
554
+ 'query_type': query_type,
555
+ 'search_mode': self.search_mode if used_retrieval else 'direct',
556
+ 'routing_info': classification,
557
+ 'elapsed_time': elapsed_time,
558
+ 'usage': self._estimate_usage(query, answer)
559
+ }
560
+
561
+ except Exception as e:
562
+ logger.error(f"โŒ ๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {e}")
563
+ import traceback
564
+ traceback.print_exc()
565
+ raise RuntimeError(f"๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}") from e
566
+
567
+ def chat(self, query: str) -> str:
568
+ """๊ฐ„๋‹จํ•œ ๋Œ€ํ™” ์ธํ„ฐํŽ˜์ด์Šค"""
569
+ result = self.generate_answer(query)
570
+ return result['answer']
571
+
572
+ def clear_history(self):
573
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ์ดˆ๊ธฐํ™”"""
574
+ self.chat_history = []
575
+ logger.info("๐Ÿ—‘๏ธ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
576
+
577
+ def get_history(self) -> List[Dict]:
578
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ฐ˜ํ™˜"""
579
+ return self.chat_history.copy()
580
+
581
+ def set_search_config(
582
+ self,
583
+ search_mode: str = None,
584
+ top_k: int = None,
585
+ alpha: float = None
586
+ ):
587
+ """๊ฒ€์ƒ‰ ์„ค์ • ๋ณ€๊ฒฝ"""
588
+ if search_mode is not None:
589
+ self.search_mode = search_mode
590
+ if top_k is not None:
591
+ self.top_k = top_k
592
+ if alpha is not None:
593
+ self.alpha = alpha
594
+
595
+ logger.info(
596
+ f"๐Ÿ”ง ๊ฒ€์ƒ‰ ์„ค์ • ๋ณ€๊ฒฝ: mode={self.search_mode}, "
597
+ f"top_k={self.top_k}, alpha={self.alpha}"
598
+ )
src/generator/generator_gguf_base.py ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ from typing import Optional, Dict, Any, List
3
+ import logging
4
+ import time
5
+ import os
6
+
7
+ from src.utils.config import RAGConfig
8
+ from src.router.query_router import QueryRouter
9
+ from src.prompts.dynamic_prompts import PromptManager
10
+
11
+ # ๋กœ๊น… ์„ค์ •
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class GGUFGenerator:
17
+ """
18
+ GGUF ๊ธฐ๋ฐ˜ Llama-3 ์ƒ์„ฑ๊ธฐ
19
+
20
+ llama.cpp๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ GGUF ํฌ๋งท ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜๊ณ 
21
+ ์ž…์ฐฐ ๊ด€๋ จ ์งˆ์˜์‘๋‹ต์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ model_path: str,
27
+ n_gpu_layers: int = 0,
28
+ n_ctx: int = 8192,
29
+ n_threads: int = 8,
30
+ config = None,
31
+ max_new_tokens: int = 256,
32
+ temperature: float = 0.7,
33
+ top_p: float = 0.9,
34
+ system_prompt: str = "๋‹น์‹ ์€ RFP(์ œ์•ˆ์š”์ฒญ์„œ) ๋ถ„์„ ๋ฐ ์š”์•ฝ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค."
35
+ ):
36
+ """์ƒ์„ฑ๊ธฐ ์ดˆ๊ธฐํ™”"""
37
+ self.config = config or RAGConfig()
38
+ self.model_path = model_path
39
+ self.n_gpu_layers = n_gpu_layers
40
+ self.n_ctx = n_ctx
41
+ self.n_threads = n_threads
42
+ self.max_new_tokens = max_new_tokens
43
+ self.temperature = temperature
44
+ self.top_p = top_p
45
+ self.system_prompt = system_prompt
46
+
47
+ # ๋ชจ๋ธ (๋‚˜์ค‘์— ๋กœ๋“œ)
48
+ self.model = None
49
+
50
+ logger.info(f"GGUFGenerator ์ดˆ๊ธฐํ™” ์™„๋ฃŒ (Base ๋ชจ๋ธ)")
51
+
52
+ def load_model(self) -> None:
53
+ """
54
+ GGUF ๋ชจ๋ธ ๋กœ๋“œ
55
+
56
+ โœ… Base ๋ชจ๋ธ ์‚ฌ์šฉ: Config์—์„œ BASE_MODEL_HUB_REPO ๊ฐ€์ ธ์˜ค๊ธฐ
57
+ """
58
+
59
+ # ์ค‘๋ณต ๋กœ๋“œ ๋ฐฉ์ง€
60
+ if self.model is not None:
61
+ logger.info("๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋“œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")
62
+ return
63
+
64
+ try:
65
+ # Config์—์„œ USE_MODEL_HUB ํ™•์ธ
66
+ use_model_hub = getattr(self.config, 'USE_MODEL_HUB', True)
67
+
68
+ # Model Hub ์‚ฌ์šฉ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ๊ฒฝ๋กœ ๊ฒฐ์ •
69
+ if use_model_hub:
70
+ # === Model Hub์—์„œ ๋‹ค์šด๋กœ๋“œ ===
71
+ # โœ… Config์—์„œ Base ๋ชจ๋ธ ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
72
+ base_model_repo = getattr(
73
+ self.config,
74
+ 'BASE_MODEL_HUB_REPO',
75
+ 'beomi/Llama-3-Open-Ko-8B-gguf'
76
+ )
77
+ base_model_filename = getattr(
78
+ self.config,
79
+ 'BASE_MODEL_HUB_FILENAME',
80
+ 'ggml-model-Q4_K_M.gguf'
81
+ )
82
+ model_cache_dir = getattr(self.config, 'MODEL_CACHE_DIR', '.cache/models')
83
+
84
+ logger.info(f"๐Ÿ“ฅ Base ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ: {base_model_repo}")
85
+ logger.info(f" ํŒŒ์ผ๋ช…: {base_model_filename}")
86
+
87
+ from huggingface_hub import hf_hub_download
88
+
89
+ model_path = hf_hub_download(
90
+ repo_id=base_model_repo,
91
+ filename=base_model_filename,
92
+ cache_dir=model_cache_dir,
93
+ local_dir=model_cache_dir,
94
+ local_dir_use_symlinks=False
95
+ )
96
+
97
+ logger.info(f"โœ… Base ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ: {model_path}")
98
+
99
+ else:
100
+ # === ๋กœ์ปฌ ํŒŒ์ผ ์‚ฌ์šฉ ===
101
+ model_path = self.model_path
102
+
103
+ if not os.path.exists(model_path):
104
+ raise FileNotFoundError(
105
+ f"โŒ ๋กœ์ปฌ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {model_path}\n"
106
+ f" USE_MODEL_HUB=true๋กœ ์„ค์ •ํ•˜๊ฑฐ๋‚˜ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ค€๋น„ํ•˜์„ธ์š”."
107
+ )
108
+
109
+ logger.info(f"๐Ÿ“‚ ๋กœ์ปฌ Base ๋ชจ๋ธ ์‚ฌ์šฉ: {model_path}")
110
+
111
+ # === ๊ณตํ†ต: ๋ชจ๋ธ ๋กœ๋“œ ===
112
+ logger.info(f"๐Ÿš€ Base GGUF ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
113
+ logger.info(f" GPU ๋ ˆ์ด์–ด: {self.n_gpu_layers}")
114
+ logger.info(f" ์ปจํ…์ŠคํŠธ: {self.n_ctx}")
115
+
116
+ self.model = Llama(
117
+ model_path=model_path,
118
+ n_gpu_layers=self.n_gpu_layers,
119
+ n_ctx=self.n_ctx,
120
+ n_threads=self.n_threads,
121
+ verbose=True,
122
+ )
123
+
124
+ # ์‹ค์ œ ์ ์šฉ๋œ n_ctx ํ™•์ธ
125
+ actual_n_ctx = self.model.n_ctx()
126
+ logger.info("โœ… Base GGUF ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ!")
127
+ logger.info(f" - ๋ชจ๋ธ: {base_model_repo if use_model_hub else 'local'}")
128
+ logger.info(f" - ์„ค์ •ํ•œ n_ctx: {self.n_ctx}")
129
+ logger.info(f" - ์‹ค์ œ n_ctx: {actual_n_ctx}")
130
+
131
+ if actual_n_ctx < self.n_ctx:
132
+ logger.warning(f"โš ๏ธ n_ctx๊ฐ€ ์˜ˆ์ƒ๋ณด๋‹ค ์ž‘์Šต๋‹ˆ๋‹ค: {actual_n_ctx} < {self.n_ctx}")
133
+
134
+ except FileNotFoundError as e:
135
+ logger.error(f"โŒ ๋ชจ๋ธ ๏ฟฝ๏ฟฝ๏ฟฝ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {e}")
136
+ raise
137
+ except Exception as e:
138
+ logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
139
+ raise RuntimeError(f"๋ชจ๋ธ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
140
+
141
+ def format_prompt(
142
+ self,
143
+ question: str,
144
+ context: Optional[str] = None,
145
+ system_prompt: Optional[str] = None
146
+ ) -> str:
147
+ """GGUF ๋ชจ๋ธ์šฉ ๊ฐ„๋‹จํ•œ ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
148
+ if system_prompt is None:
149
+ system_prompt = self.system_prompt
150
+
151
+ if context is not None:
152
+ user_message = f"์ฐธ๊ณ  ๋ฌธ์„œ:\n{context}\n\n์งˆ๋ฌธ: {question}"
153
+ else:
154
+ user_message = question
155
+
156
+ formatted_prompt = f"""### ์‹œ์Šคํ…œ
157
+ {system_prompt}
158
+
159
+ ### ์‚ฌ์šฉ์ž
160
+ {user_message}
161
+
162
+ ### ๋‹ต๋ณ€
163
+ """
164
+
165
+ return formatted_prompt
166
+
167
+ def generate(
168
+ self,
169
+ prompt: str,
170
+ max_new_tokens: Optional[int] = None,
171
+ temperature: Optional[float] = None,
172
+ top_p: Optional[float] = None,
173
+ ) -> str:
174
+ """ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅ๋ฐ›์•„ ์‘๋‹ต ์ƒ์„ฑ"""
175
+ if self.model is None:
176
+ raise RuntimeError(
177
+ "๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. load_model()์„ ๋จผ์ € ํ˜ธ์ถœํ•˜์„ธ์š”."
178
+ )
179
+
180
+ if max_new_tokens is None:
181
+ max_new_tokens = self.max_new_tokens
182
+ if temperature is None:
183
+ temperature = self.temperature
184
+ if top_p is None:
185
+ top_p = self.top_p
186
+
187
+ try:
188
+ logger.info(f"๐Ÿ”„ ์ƒ์„ฑ ์‹œ์ž‘ (max_tokens={max_new_tokens}, temp={temperature})")
189
+ start_time = time.time()
190
+
191
+ output = self.model(
192
+ prompt,
193
+ max_tokens=max_new_tokens,
194
+ temperature=temperature,
195
+ top_p=top_p,
196
+ echo=False,
197
+ stop=[
198
+ "###", "\n\n###",
199
+ "### ์‚ฌ์šฉ์ž", "\n์‚ฌ์šฉ์ž:",
200
+ "</s>",
201
+ "ํ•œ๊ตญ์–ด ๋‹ต๋ณ€", "ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€", "์ง€์นจ:",
202
+ "๋ฌธ์žฅ", "(๋ฌธ์žฅ",
203
+ "\n\n",
204
+ "?",
205
+ "์š”?", "๊นŒ?", "๋‚˜์š”?", "์Šต๋‹ˆ๊นŒ?"
206
+ ],
207
+ )
208
+
209
+ elapsed = time.time() - start_time
210
+ logger.info(f"โœ… ์ƒ์„ฑ ์™„๋ฃŒ: {elapsed:.2f}์ดˆ")
211
+
212
+ response = output['choices'][0]['text'].strip()
213
+
214
+ logger.info(f"๐Ÿ“ ์‘๋‹ต ๊ธธ์ด: {len(response)} ๊ธ€์ž")
215
+ return response
216
+
217
+ except Exception as e:
218
+ logger.error(f"โŒ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
219
+ raise RuntimeError(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
220
+
221
+ def chat(
222
+ self,
223
+ question: str,
224
+ context: Optional[str] = None,
225
+ system_prompt=None,
226
+ **kwargs
227
+ ) -> str:
228
+ """์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต ์ƒ์„ฑ"""
229
+ prompt = self.format_prompt(
230
+ question=question,
231
+ context=context,
232
+ system_prompt=system_prompt
233
+ )
234
+
235
+ response = self.generate(prompt, **kwargs)
236
+
237
+ return response
238
+
239
+
240
+ class GGUFBaseRAGPipeline:
241
+ """
242
+ Base ๋ชจ๋ธ + RAG ํŒŒ์ดํ”„๋ผ์ธ
243
+
244
+ โœ… Base ๋ชจ๋ธ ์‚ฌ์šฉ (beomi/Llama-3-Open-Ko-8B)
245
+ โœ… RAG ์œ ์ง€
246
+ โœ… ๊ธฐ์กด generator_gguf.py์™€ ๋™์ผํ•œ ๊ธฐ๋Šฅ
247
+ """
248
+
249
+ def __init__(
250
+ self,
251
+ config=None,
252
+ model: str = None,
253
+ top_k: int = None,
254
+ n_gpu_layers: int = None,
255
+ n_ctx: int = None,
256
+ n_threads: int = None,
257
+ max_new_tokens: int = None,
258
+ temperature: float = None,
259
+ top_p: float = None,
260
+ search_mode: str = None,
261
+ alpha: float = None
262
+ ):
263
+ """์ดˆ๊ธฐํ™”"""
264
+ self.config = config or RAGConfig()
265
+
266
+ # ๊ฒ€์ƒ‰ ์„ค์ •
267
+ self.top_k = top_k or getattr(self.config, 'DEFAULT_TOP_K', 10)
268
+ self.search_mode = search_mode or getattr(self.config, 'DEFAULT_SEARCH_MODE', 'hybrid_rerank')
269
+ self.alpha = alpha if alpha is not None else getattr(self.config, 'DEFAULT_ALPHA', 0.5)
270
+
271
+ # Retriever ์ดˆ๊ธฐํ™”
272
+ logger.info("RAGRetriever ์ดˆ๊ธฐํ™” ์ค‘...")
273
+ from src.retriever.retriever import RAGRetriever
274
+ self.retriever = RAGRetriever(config=self.config)
275
+
276
+ # GGUF ์„ค์ •
277
+ gguf_n_gpu_layers = n_gpu_layers if n_gpu_layers is not None else getattr(self.config, 'GGUF_N_GPU_LAYERS', 35)
278
+ gguf_n_ctx = n_ctx if n_ctx is not None else getattr(self.config, 'GGUF_N_CTX', 2048)
279
+ gguf_n_threads = n_threads if n_threads is not None else getattr(self.config, 'GGUF_N_THREADS', 4)
280
+ gguf_max_new_tokens = max_new_tokens if max_new_tokens is not None else getattr(self.config, 'GGUF_MAX_NEW_TOKENS', 512)
281
+ gguf_temperature = temperature if temperature is not None else getattr(self.config, 'GGUF_TEMPERATURE', 0.7)
282
+ gguf_top_p = top_p if top_p is not None else getattr(self.config, 'GGUF_TOP_P', 0.9)
283
+
284
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ (์‚ฌ์šฉ ์•ˆ ํ•จ, Hub์—์„œ ๋‹ค์šด๋กœ๋“œ)
285
+ gguf_model_path = getattr(self.config, 'GGUF_MODEL_PATH', '.cache/models/llama-3-ko-8b.gguf')
286
+
287
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
288
+ system_prompt = getattr(self.config, 'SYSTEM_PROMPT', '๋‹น์‹ ์€ ํ•œ๊ตญ ๊ณต๊ณต๊ธฐ๊ด€ ์‚ฌ์—…์ œ์•ˆ์„œ ๋ถ„์„ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.')
289
+
290
+ # GGUFGenerator ์ดˆ๊ธฐํ™”
291
+ logger.info("GGUFGenerator ์ดˆ๊ธฐํ™” ์ค‘... (Base ๋ชจ๋ธ)")
292
+ logger.info(f" GPU ๋ ˆ์ด์–ด: {gguf_n_gpu_layers}")
293
+ logger.info(f" ์ปจํ…์ŠคํŠธ: {gguf_n_ctx}")
294
+
295
+ self.generator = GGUFGenerator(
296
+ model_path=gguf_model_path,
297
+ n_gpu_layers=gguf_n_gpu_layers,
298
+ n_ctx=gguf_n_ctx,
299
+ n_threads=gguf_n_threads,
300
+ config=self.config,
301
+ max_new_tokens=gguf_max_new_tokens,
302
+ temperature=gguf_temperature,
303
+ top_p=gguf_top_p,
304
+ system_prompt=system_prompt
305
+ )
306
+
307
+ # ๋ชจ๋ธ ๋กœ๋“œ
308
+ logger.info("Base GGUF ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
309
+ self.generator.load_model()
310
+
311
+ # Router
312
+ self.router = QueryRouter()
313
+
314
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ
315
+ self.chat_history: List[Dict] = []
316
+
317
+ # ๋งˆ์ง€๋ง‰ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ
318
+ self._last_retrieved_docs = []
319
+
320
+ logger.info("โœ… GGUFBaseRAGPipeline ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
321
+ logger.info(f" - ๊ฒ€์ƒ‰ ๋ชจ๋“œ: {self.search_mode}")
322
+ logger.info(f" - ๊ธฐ๋ณธ top_k: {self.top_k}")
323
+
324
+ def _retrieve_and_format(self, query: str) -> str:
325
+ """๊ฒ€์ƒ‰ ์ˆ˜ํ–‰ ๋ฐ ์ปจํ…์ŠคํŠธ ํฌ๋งทํŒ…"""
326
+ # ๊ฒ€์ƒ‰ ๋ชจ๋“œ์— ๋”ฐ๋ผ ๋ฌธ์„œ ๊ฒ€์ƒ‰
327
+ if self.search_mode == "embedding":
328
+ docs = self.retriever.search(query, top_k=self.top_k)
329
+ elif self.search_mode == "embedding_rerank":
330
+ docs = self.retriever.search_with_rerank(query, top_k=self.top_k)
331
+ elif self.search_mode == "hybrid":
332
+ docs = self.retriever.hybrid_search(
333
+ query, top_k=self.top_k, alpha=self.alpha
334
+ )
335
+ elif self.search_mode == "hybrid_rerank":
336
+ docs = self.retriever.hybrid_search_with_rerank(
337
+ query, top_k=self.top_k, alpha=self.alpha
338
+ )
339
+ else:
340
+ docs = self.retriever.search(query, top_k=self.top_k)
341
+
342
+ # ๋งˆ์ง€๋ง‰ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ €์žฅ
343
+ self._last_retrieved_docs = docs
344
+
345
+ # ์ปจํ…์ŠคํŠธ ํฌ๋งทํŒ…
346
+ return self._format_context(docs)
347
+
348
+ def _format_context(self, retrieved_docs: list) -> str:
349
+ """๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋ฅผ ์ปจํ…์ŠคํŠธ๋กœ ๋ณ€ํ™˜"""
350
+ if not retrieved_docs:
351
+ return "๊ด€๋ จ ๋ฌธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
352
+
353
+ context_parts = []
354
+ max_context_chars = 8000
355
+
356
+ current_length = 0
357
+ for i, doc in enumerate(retrieved_docs, 1):
358
+ doc_text = f"[๋ฌธ์„œ {i}]\n{doc['content']}\n"
359
+ doc_length = len(doc_text)
360
+
361
+ if current_length + doc_length > max_context_chars:
362
+ logger.warning(f"โš ๏ธ ์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ: {i-1}๊ฐœ ๋ฌธ์„œ๋งŒ ์‚ฌ์šฉ")
363
+ break
364
+
365
+ context_parts.append(doc_text)
366
+ current_length += doc_length
367
+
368
+ return "\n".join(context_parts)
369
+
370
+ def _format_sources(self, retrieved_docs: list) -> list:
371
+ """๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋ฅผ sources ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
372
+ sources = []
373
+ for doc in retrieved_docs:
374
+ source_info = {
375
+ 'content': doc['content'],
376
+ 'metadata': doc['metadata'],
377
+ 'filename': doc.get('filename', 'N/A'),
378
+ 'organization': doc.get('organization', 'N/A')
379
+ }
380
+
381
+ if 'rerank_score' in doc:
382
+ source_info['score'] = doc['rerank_score']
383
+ source_info['score_type'] = 'rerank'
384
+ elif 'hybrid_score' in doc:
385
+ source_info['score'] = doc['hybrid_score']
386
+ source_info['score_type'] = 'hybrid'
387
+ elif 'relevance_score' in doc:
388
+ source_info['score'] = doc['relevance_score']
389
+ source_info['score_type'] = 'embedding'
390
+ else:
391
+ source_info['score'] = 0
392
+ source_info['score_type'] = 'unknown'
393
+
394
+ sources.append(source_info)
395
+
396
+ return sources
397
+
398
+ def _estimate_usage(self, query: str, answer: str) -> dict:
399
+ """ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰ ์ถ”์ •"""
400
+ prompt_tokens = len(query.split()) * 2
401
+ completion_tokens = len(answer.split()) * 2
402
+
403
+ return {
404
+ 'total_tokens': prompt_tokens + completion_tokens,
405
+ 'prompt_tokens': prompt_tokens,
406
+ 'completion_tokens': completion_tokens
407
+ }
408
+
409
+ def generate_answer(
410
+ self,
411
+ query: str,
412
+ top_k: int = None,
413
+ search_mode: str = None,
414
+ alpha: float = None
415
+ ) -> dict:
416
+ """๋‹ต๋ณ€ ์ƒ์„ฑ (Base ๋ชจ๋ธ + RAG)"""
417
+ try:
418
+ start_time = time.time()
419
+
420
+ # ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ •
421
+ if top_k is not None:
422
+ self.top_k = top_k
423
+ if search_mode is not None:
424
+ self.search_mode = search_mode
425
+ if alpha is not None:
426
+ self.alpha = alpha
427
+
428
+ # Router๋กœ ๊ฒ€์ƒ‰ ์—ฌ๋ถ€ ๊ฒฐ์ •
429
+ classification = self.router.classify(query)
430
+ query_type = classification['type']
431
+
432
+ logger.info(f"๐Ÿ“ ๋ถ„๋ฅ˜: {query_type} (์‹ ๋ขฐ๋„: {classification['confidence']:.2f})")
433
+
434
+ # ํƒ€์ž…๋ณ„ ์ฒ˜๋ฆฌ
435
+ if query_type in ['greeting', 'thanks', 'out_of_scope']:
436
+ # ๊ฒ€์ƒ‰ ์Šคํ‚ต
437
+ context = None
438
+ used_retrieval = False
439
+ self._last_retrieved_docs = []
440
+
441
+ # ๋™์  ํ”„๋กฌํ”„ํŠธ
442
+ system_prompt = PromptManager.get_prompt(query_type, model_type="gguf")
443
+ logger.info(f"โญ๏ธ RAG ์Šคํ‚ต: {query_type}")
444
+
445
+ elif query_type == 'document':
446
+ # RAG ์ˆ˜ํ–‰
447
+ context = self._retrieve_and_format(query)
448
+ used_retrieval = True
449
+
450
+ # ๋™์  ํ”„๋กฌํ”„ํŠธ
451
+ system_prompt = PromptManager.get_prompt('document', model_type="gguf")
452
+ logger.info(f"๐Ÿ” RAG ์ˆ˜ํ–‰: {len(self._last_retrieved_docs)}๊ฐœ ๋ฌธ์„œ")
453
+
454
+ # ๋‹ต๋ณ€ ์ƒ์„ฑ
455
+ answer = self.generator.chat(
456
+ question=query,
457
+ context=context,
458
+ system_prompt=system_prompt
459
+ )
460
+
461
+ elapsed_time = time.time() - start_time
462
+
463
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ์ถ”๊ฐ€
464
+ self.chat_history.append({"role": "user", "content": query})
465
+ self.chat_history.append({"role": "assistant", "content": answer})
466
+
467
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
468
+ return {
469
+ 'answer': answer,
470
+ 'sources': self._format_sources(self._last_retrieved_docs),
471
+ 'used_retrieval': used_retrieval,
472
+ 'query_type': query_type,
473
+ 'search_mode': self.search_mode if used_retrieval else 'direct',
474
+ 'routing_info': classification,
475
+ 'elapsed_time': elapsed_time,
476
+ 'usage': self._estimate_usage(query, answer)
477
+ }
478
+
479
+ except Exception as e:
480
+ logger.error(f"โŒ ๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {e}")
481
+ import traceback
482
+ traceback.print_exc()
483
+ raise RuntimeError(f"๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}") from e
484
+
485
+ def chat(self, query: str) -> str:
486
+ """๊ฐ„๋‹จํ•œ ๋Œ€ํ™” ์ธํ„ฐํŽ˜์ด์Šค"""
487
+ result = self.generate_answer(query)
488
+ return result['answer']
489
+
490
+ def clear_history(self):
491
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ์ดˆ๊ธฐํ™”"""
492
+ self.chat_history = []
493
+ logger.info("๐Ÿ—‘๏ธ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
494
+
495
+ def get_history(self) -> List[Dict]:
496
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ฐ˜ํ™˜"""
497
+ return self.chat_history.copy()
498
+
499
+ def set_search_config(
500
+ self,
501
+ search_mode: str = None,
502
+ top_k: int = None,
503
+ alpha: float = None
504
+ ):
505
+ """๊ฒ€์ƒ‰ ์„ค์ • ๋ณ€๊ฒฝ"""
506
+ if search_mode is not None:
507
+ self.search_mode = search_mode
508
+ if top_k is not None:
509
+ self.top_k = top_k
510
+ if alpha is not None:
511
+ self.alpha = alpha
512
+
513
+ logger.info(
514
+ f"๐Ÿ”ง ๊ฒ€์ƒ‰ ์„ค์ • ๋ณ€๊ฒฝ: mode={self.search_mode}, "
515
+ f"top_k={self.top_k}, alpha={self.alpha}"
516
+ )
src/generator/generator_gguf_no_rag.py ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ from typing import Optional, Dict, Any, List
3
+ import logging
4
+ import time
5
+ import os
6
+
7
+ from src.utils.config import RAGConfig
8
+ from src.router.query_router import QueryRouter
9
+ from src.prompts.dynamic_prompts import PromptManager
10
+
11
+ # ๋กœ๊น… ์„ค์ •
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class GGUFGenerator:
17
+ """
18
+ GGUF ๊ธฐ๋ฐ˜ Llama-3 ์ƒ์„ฑ๊ธฐ
19
+
20
+ llama.cpp๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ GGUF ํฌ๋งท ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜๊ณ 
21
+ ์ž…์ฐฐ ๊ด€๋ จ ์งˆ์˜์‘๋‹ต์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ model_path: str,
27
+ n_gpu_layers: int = 0,
28
+ n_ctx: int = 8192,
29
+ n_threads: int = 8,
30
+ config = None,
31
+ max_new_tokens: int = 256,
32
+ temperature: float = 0.7,
33
+ top_p: float = 0.9,
34
+ system_prompt: str = "๋‹น์‹ ์€ RFP(์ œ์•ˆ์š”์ฒญ์„œ) ๋ถ„์„ ๋ฐ ์š”์•ฝ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค."
35
+ ):
36
+ """์ƒ์„ฑ๊ธฐ ์ดˆ๊ธฐํ™”"""
37
+ self.config = config or RAGConfig()
38
+ self.model_path = model_path
39
+ self.n_gpu_layers = n_gpu_layers
40
+ self.n_ctx = n_ctx
41
+ self.n_threads = n_threads
42
+ self.max_new_tokens = max_new_tokens
43
+ self.temperature = temperature
44
+ self.top_p = top_p
45
+ self.system_prompt = system_prompt
46
+
47
+ # ๋ชจ๋ธ (๋‚˜์ค‘์— ๋กœ๋“œ)
48
+ self.model = None
49
+
50
+ logger.info(f"GGUFGenerator ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
51
+
52
+ def load_model(self) -> None:
53
+ """GGUF ๋ชจ๋ธ ๋กœ๋“œ"""
54
+
55
+ # ์ค‘๋ณต ๋กœ๋“œ ๋ฐฉ์ง€
56
+ if self.model is not None:
57
+ logger.info("๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋“œ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.")
58
+ return
59
+
60
+ try:
61
+ # Config์—์„œ USE_MODEL_HUB ํ™•์ธ
62
+ use_model_hub = getattr(self.config, 'USE_MODEL_HUB', True)
63
+
64
+ # Model Hub ์‚ฌ์šฉ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ๊ฒฝ๋กœ ๊ฒฐ์ •
65
+ if use_model_hub:
66
+ # === Model Hub์—์„œ ๋‹ค์šด๋กœ๋“œ ===
67
+ model_hub_repo = getattr(self.config, 'MODEL_HUB_REPO', 'beomi/Llama-3-Open-Ko-8B-gguf')
68
+ model_hub_filename = getattr(self.config, 'MODEL_HUB_FILENAME', 'ggml-model-Q4_K_M.gguf')
69
+ model_cache_dir = getattr(self.config, 'MODEL_CACHE_DIR', '.cache/models')
70
+
71
+ logger.info(f"๐Ÿ“ฅ Model Hub์—์„œ ๋‹ค์šด๋กœ๋“œ: {model_hub_repo}")
72
+
73
+ from huggingface_hub import hf_hub_download
74
+
75
+ model_path = hf_hub_download(
76
+ repo_id=model_hub_repo,
77
+ filename=model_hub_filename,
78
+ cache_dir=model_cache_dir,
79
+ local_dir=model_cache_dir,
80
+ local_dir_use_symlinks=False
81
+ )
82
+
83
+ logger.info(f"โœ… ๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ: {model_path}")
84
+
85
+ else:
86
+ # === ๋กœ์ปฌ ํŒŒ์ผ ์‚ฌ์šฉ ===
87
+ model_path = self.model_path
88
+
89
+ if not os.path.exists(model_path):
90
+ raise FileNotFoundError(
91
+ f"โŒ ๋กœ์ปฌ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {model_path}\n"
92
+ f" USE_MODEL_HUB=true๋กœ ์„ค์ •ํ•˜๊ฑฐ๋‚˜ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ค€๋น„ํ•˜์„ธ์š”."
93
+ )
94
+
95
+ logger.info(f"๐Ÿ“‚ ๋กœ์ปฌ ๋ชจ๋ธ ์‚ฌ์šฉ: {model_path}")
96
+
97
+ # === ๊ณตํ†ต: ๋ชจ๋ธ ๋กœ๋“œ ===
98
+ logger.info(f"๐Ÿš€ GGUF ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
99
+ logger.info(f" GPU ๋ ˆ์ด์–ด: {self.n_gpu_layers}")
100
+ logger.info(f" ์ปจํ…์ŠคํŠธ: {self.n_ctx}")
101
+
102
+ self.model = Llama(
103
+ model_path=model_path,
104
+ n_gpu_layers=self.n_gpu_layers,
105
+ n_ctx=self.n_ctx,
106
+ n_threads=self.n_threads,
107
+ verbose=True,
108
+ )
109
+
110
+ # ์‹ค์ œ ์ ์šฉ๋œ n_ctx ํ™•์ธ
111
+ actual_n_ctx = self.model.n_ctx()
112
+ logger.info("โœ… GGUF ๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ!")
113
+ logger.info(f" - ์„ค์ •ํ•œ n_ctx: {self.n_ctx}")
114
+ logger.info(f" - ์‹ค์ œ n_ctx: {actual_n_ctx}")
115
+
116
+ if actual_n_ctx < self.n_ctx:
117
+ logger.warning(f"โš ๏ธ n_ctx๊ฐ€ ์˜ˆ์ƒ๋ณด๋‹ค ์ž‘์Šต๋‹ˆ๋‹ค: {actual_n_ctx} < {self.n_ctx}")
118
+ logger.warning(f" ๋ฉ”๋ชจ๋ฆฌ ๋ถ€์กฑ์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. n_gpu_layers๋ฅผ ์ค„์—ฌ๋ณด์„ธ์š”.")
119
+
120
+ except FileNotFoundError as e:
121
+ logger.error(f"โŒ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {e}")
122
+ raise
123
+ except Exception as e:
124
+ logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
125
+ raise RuntimeError(f"๋ชจ๋ธ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
126
+
127
+ def format_prompt(
128
+ self,
129
+ question: str,
130
+ context: Optional[str] = None,
131
+ system_prompt: Optional[str] = None
132
+ ) -> str:
133
+ """GGUF ๋ชจ๋ธ์šฉ ๊ฐ„๋‹จํ•œ ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
134
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •
135
+ if system_prompt is None:
136
+ system_prompt = self.system_prompt
137
+
138
+ # ์ปจํ…์ŠคํŠธ ํฌํ•จ ์—ฌ๋ถ€
139
+ if context is not None:
140
+ user_message = f"์ฐธ๊ณ  ๋ฌธ์„œ:\n{context}\n\n์งˆ๋ฌธ: {question}"
141
+ else:
142
+ user_message = question
143
+
144
+ # ๊ฐ„๋‹จํ•œ ํ•œ๊ตญ์–ด ํ…œํ”Œ๋ฆฟ
145
+ formatted_prompt = f"""### ์‹œ์Šคํ…œ
146
+ {system_prompt}
147
+
148
+ ### ์‚ฌ์šฉ์ž
149
+ {user_message}
150
+
151
+ ### ๋‹ต๋ณ€
152
+ """
153
+
154
+ return formatted_prompt
155
+
156
+ def generate(
157
+ self,
158
+ prompt: str,
159
+ max_new_tokens: Optional[int] = None,
160
+ temperature: Optional[float] = None,
161
+ top_p: Optional[float] = None,
162
+ ) -> str:
163
+ """ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅ๋ฐ›์•„ ์‘๋‹ต ์ƒ์„ฑ"""
164
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ™•์ธ
165
+ if self.model is None:
166
+ raise RuntimeError(
167
+ "๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. load_model()์„ ๋จผ์ € ํ˜ธ์ถœํ•˜์„ธ์š”."
168
+ )
169
+
170
+ # ํŒŒ๋ผ๋ฏธํ„ฐ ์„ค์ •
171
+ if max_new_tokens is None:
172
+ max_new_tokens = self.max_new_tokens
173
+ if temperature is None:
174
+ temperature = self.temperature
175
+ if top_p is None:
176
+ top_p = self.top_p
177
+
178
+ try:
179
+ logger.info(f"๐Ÿ”„ ์ƒ์„ฑ ์‹œ์ž‘ (max_tokens={max_new_tokens}, temp={temperature})")
180
+ start_time = time.time()
181
+
182
+ # ์ƒ์„ฑ
183
+ output = self.model(
184
+ prompt,
185
+ max_tokens=max_new_tokens,
186
+ temperature=temperature,
187
+ top_p=top_p,
188
+ echo=False,
189
+ stop=[
190
+ "###", "\n\n###",
191
+ "### ์‚ฌ์šฉ์ž", "\n์‚ฌ์šฉ์ž:",
192
+ "</s>",
193
+ "ํ•œ๊ตญ์–ด ๋‹ต๋ณ€", "ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€", "์ง€์นจ:",
194
+ "๋ฌธ์žฅ", "(๋ฌธ์žฅ",
195
+ "\n\n",
196
+ "?",
197
+ "์š”?", "๊นŒ?", "๋‚˜์š”?", "์Šต๋‹ˆ๊นŒ?"
198
+ ],
199
+ )
200
+
201
+ elapsed = time.time() - start_time
202
+ logger.info(f"โœ… ์ƒ์„ฑ ์™„๋ฃŒ: {elapsed:.2f}์ดˆ")
203
+
204
+ # ์‘๋‹ต ์ถ”์ถœ
205
+ response = output['choices'][0]['text'].strip()
206
+
207
+ logger.info(f"๐Ÿ“ ์‘๋‹ต ๊ธธ์ด: {len(response)} ๊ธ€์ž")
208
+ return response
209
+
210
+ except Exception as e:
211
+ logger.error(f"โŒ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
212
+ raise RuntimeError(f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
213
+
214
+ def chat(
215
+ self,
216
+ question: str,
217
+ context: Optional[str] = None,
218
+ system_prompt=None,
219
+ **kwargs
220
+ ) -> str:
221
+ """์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต ์ƒ์„ฑ (ํ†ตํ•ฉ ๋ฉ”์„œ๋“œ)"""
222
+ # ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…
223
+ prompt = self.format_prompt(
224
+ question=question,
225
+ context=context,
226
+ system_prompt=system_prompt
227
+ )
228
+
229
+ # ์‘๋‹ต ์ƒ์„ฑ
230
+ response = self.generate(prompt, **kwargs)
231
+
232
+ return response
233
+
234
+
235
+ class GGUFNoRAGPipeline:
236
+ """
237
+ QLoRA ๋ชจ๋ธ ๋‹จ๋… ํŒŒ์ดํ”„๋ผ์ธ (RAG ์ œ๊ฑฐ)
238
+
239
+ โœ… Retriever ์™„์ „ ์ œ๊ฑฐ
240
+ โœ… Router๋งŒ ์œ ์ง€ (greeting/thanks ์ฒ˜๋ฆฌ์šฉ)
241
+ โœ… ์ˆœ์ˆ˜ ๋ชจ๋ธ ์„ฑ๋Šฅ๋งŒ ์ธก์ •
242
+ """
243
+
244
+ def __init__(
245
+ self,
246
+ config=None,
247
+ model: str = None,
248
+ top_k: int = None,
249
+ n_gpu_layers: int = None,
250
+ n_ctx: int = None,
251
+ n_threads: int = None,
252
+ max_new_tokens: int = None,
253
+ temperature: float = None,
254
+ top_p: float = None
255
+ ):
256
+ """์ดˆ๊ธฐํ™”"""
257
+ self.config = config or RAGConfig()
258
+
259
+ # GGUF ์„ค์ •
260
+ gguf_n_gpu_layers = n_gpu_layers if n_gpu_layers is not None else getattr(self.config, 'GGUF_N_GPU_LAYERS', 35)
261
+ gguf_n_ctx = n_ctx if n_ctx is not None else getattr(self.config, 'GGUF_N_CTX', 2048)
262
+ gguf_n_threads = n_threads if n_threads is not None else getattr(self.config, 'GGUF_N_THREADS', 4)
263
+ gguf_max_new_tokens = max_new_tokens if max_new_tokens is not None else getattr(self.config, 'GGUF_MAX_NEW_TOKENS', 512)
264
+ gguf_temperature = temperature if temperature is not None else getattr(self.config, 'GGUF_TEMPERATURE', 0.7)
265
+ gguf_top_p = top_p if top_p is not None else getattr(self.config, 'GGUF_TOP_P', 0.9)
266
+
267
+ # ๋ชจ๋ธ ๊ฒฝ๋กœ
268
+ gguf_model_path = getattr(self.config, 'GGUF_MODEL_PATH', '.cache/models/llama-3-ko-8b.gguf')
269
+
270
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
271
+ system_prompt = getattr(self.config, 'SYSTEM_PROMPT', '๋‹น์‹ ์€ ํ•œ๊ตญ ๊ณต๊ณต๊ธฐ๊ด€ ์‚ฌ์—…์ œ์•ˆ์„œ ๋ถ„์„ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.')
272
+
273
+ # GGUFGenerator ์ดˆ๊ธฐํ™”
274
+ logger.info("GGUFGenerator ์ดˆ๊ธฐํ™” ์ค‘... (RAG ์—†์Œ)")
275
+ logger.info(f" GPU ๋ ˆ์ด์–ด: {gguf_n_gpu_layers}")
276
+ logger.info(f" ์ปจํ…์ŠคํŠธ: {gguf_n_ctx}")
277
+ logger.info(f" ์Šค๋ ˆ๋“œ: {gguf_n_threads}")
278
+
279
+ self.generator = GGUFGenerator(
280
+ model_path=gguf_model_path,
281
+ n_gpu_layers=gguf_n_gpu_layers,
282
+ n_ctx=gguf_n_ctx,
283
+ n_threads=gguf_n_threads,
284
+ config=self.config,
285
+ max_new_tokens=gguf_max_new_tokens,
286
+ temperature=gguf_temperature,
287
+ top_p=gguf_top_p,
288
+ system_prompt=system_prompt
289
+ )
290
+
291
+ # ๋ชจ๋ธ ๋กœ๋“œ
292
+ logger.info("GGUF ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
293
+ self.generator.load_model()
294
+
295
+ # โœ… Retriever ์—†์Œ (์™„์ „ ์ œ๊ฑฐ)
296
+ self.retriever = None
297
+
298
+ # Router (greeting/thanks ์ฒ˜๋ฆฌ์šฉ)
299
+ self.router = QueryRouter()
300
+
301
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ
302
+ self.chat_history: List[Dict] = []
303
+
304
+ logger.info("โœ… GGUFNoRAGPipeline ์ดˆ๊ธฐํ™” ์™„๋ฃŒ (RAG ์ œ๊ฑฐ)")
305
+ logger.info(" - Retriever: โŒ ์—†์Œ")
306
+ logger.info(" - Router: โœ… ์žˆ์Œ (greeting/thanks์šฉ)")
307
+
308
+ def _estimate_usage(self, query: str, answer: str) -> dict:
309
+ """ํ† ํฐ ์‚ฌ์šฉ๋Ÿ‰ ์ถ”์ •"""
310
+ prompt_tokens = len(query.split()) * 2
311
+ completion_tokens = len(answer.split()) * 2
312
+
313
+ return {
314
+ 'total_tokens': prompt_tokens + completion_tokens,
315
+ 'prompt_tokens': prompt_tokens,
316
+ 'completion_tokens': completion_tokens
317
+ }
318
+
319
+ def generate_answer(
320
+ self,
321
+ query: str,
322
+ top_k: int = None,
323
+ search_mode: str = None,
324
+ alpha: float = None
325
+ ) -> dict:
326
+ """
327
+ ๋‹ต๋ณ€ ์ƒ์„ฑ (RAG ์—†์Œ)
328
+
329
+ Args:
330
+ query: ์งˆ๋ฌธ
331
+ top_k: ์‚ฌ์šฉ ์•ˆ ํ•จ (ํ˜ธํ™˜์„ฑ์šฉ)
332
+ search_mode: ์‚ฌ์šฉ ์•ˆ ํ•จ (ํ˜ธํ™˜์„ฑ์šฉ)
333
+ alpha: ์‚ฌ์šฉ ์•ˆ ํ•จ (ํ˜ธํ™˜์„ฑ์šฉ)
334
+
335
+ Returns:
336
+ dict: answer, sources, search_mode, usage, elapsed_time, used_retrieval
337
+ """
338
+ try:
339
+ start_time = time.time()
340
+
341
+ # Router๋กœ ์งˆ๋ฌธ ๋ถ„๋ฅ˜
342
+ classification = self.router.classify(query)
343
+ query_type = classification['type']
344
+
345
+ logger.info(f"๐Ÿ“ ๋ถ„๋ฅ˜: {query_type} (์‹ ๋ขฐ๋„: {classification['confidence']:.2f})")
346
+
347
+ # ๋™์  ํ”„๋กฌํ”„ํŠธ ์„ ํƒ
348
+ if query_type in ['greeting', 'thanks', 'out_of_scope']:
349
+ system_prompt = PromptManager.get_prompt(query_type, model_type="gguf")
350
+ else:
351
+ system_prompt = PromptManager.get_prompt('document', model_type="gguf")
352
+
353
+ # โœ… ํ•ญ์ƒ RAG ์—†์ด ์ƒ์„ฑ (context=None)
354
+ answer = self.generator.chat(
355
+ question=query,
356
+ context=None, # โœ… ์ปจํ…์ŠคํŠธ ์—†์Œ
357
+ system_prompt=system_prompt
358
+ )
359
+
360
+ elapsed_time = time.time() - start_time
361
+
362
+ # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ์ถ”๊ฐ€
363
+ self.chat_history.append({"role": "user", "content": query})
364
+ self.chat_history.append({"role": "assistant", "content": answer})
365
+
366
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
367
+ return {
368
+ 'answer': answer,
369
+ 'sources': [], # โœ… ์†Œ์Šค ์—†์Œ
370
+ 'used_retrieval': False, # โœ… ๊ฒ€์ƒ‰ ์•ˆ ํ•จ
371
+ 'query_type': query_type,
372
+ 'search_mode': 'none', # โœ… ๊ฒ€์ƒ‰ ๋ชจ๋“œ ์—†์Œ
373
+ 'routing_info': classification,
374
+ 'elapsed_time': elapsed_time,
375
+ 'usage': self._estimate_usage(query, answer)
376
+ }
377
+
378
+ except Exception as e:
379
+ logger.error(f"โŒ ๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {e}")
380
+ import traceback
381
+ traceback.print_exc()
382
+ raise RuntimeError(f"๋‹ต๋ณ€ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}") from e
383
+
384
+ def chat(self, query: str) -> str:
385
+ """๊ฐ„๋‹จํ•œ ๋Œ€ํ™” ์ธํ„ฐํŽ˜์ด์Šค"""
386
+ result = self.generate_answer(query)
387
+ return result['answer']
388
+
389
+ def clear_history(self):
390
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ์ดˆ๊ธฐํ™”"""
391
+ self.chat_history = []
392
+ logger.info("๐Ÿ—‘๏ธ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
393
+
394
+ def get_history(self) -> List[Dict]:
395
+ """๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ฐ˜ํ™˜"""
396
+ return self.chat_history.copy()
src/retriever/main.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from RAG_pipeline_v1.rag_config import RAGConfig
3
+ from RAG_pipeline_v1.rag_data_processing import RAGVectorDBPipeline
4
+ from RAG_pipeline_v1.rag_pipeline import RAGPipeline
5
+ from RAG_pipeline_v1.rag_evaluator import RAGEvaluator
6
+
7
+
8
+ def main():
9
+ """๋ฉ”์ธ ์‹คํ–‰ ํ•จ์ˆ˜"""
10
+
11
+ # ===== ํ™˜๊ฒฝ ์„ค์ • =====
12
+ print("="*60)
13
+ print("RAG ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”")
14
+ print("="*60)
15
+
16
+ os.environ["OPENAI_API_KEY"] = RAGConfig.OPENAI_API_KEY
17
+
18
+ config = RAGConfig()
19
+ config.validate()
20
+ print(config)
21
+
22
+ # ===== 1. Vector DB ๊ตฌ์ถ• (์ตœ์ดˆ 1ํšŒ๋งŒ) =====
23
+ # ์ฃผ์„ ํ•ด์ œํ•˜์—ฌ ์‹คํ–‰
24
+ # print("\n" + "="*60)
25
+ # print("Vector DB ๊ตฌ์ถ•")
26
+ # print("="*60)
27
+ # db_pipeline = RAGVectorDBPipeline(config)
28
+ # vectorstore = db_pipeline.build()
29
+ # db_pipeline.test_search()
30
+
31
+ # ===== 2. RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™” =====
32
+ print("\n" + "="*60)
33
+ print("RAG ํŒŒ์ดํ”„๋ผ์ธ ์ดˆ๊ธฐํ™”")
34
+ print("="*60)
35
+
36
+ rag = RAGPipeline(config=config)
37
+
38
+ # ===== 3. ํ…Œ์ŠคํŠธ ์ฟผ๋ฆฌ =====
39
+ print("\n" + "="*60)
40
+ print("ํ…Œ์ŠคํŠธ ์ฟผ๋ฆฌ")
41
+ print("="*60)
42
+
43
+ test_queries = [
44
+ "ํ•œ์˜๋Œ€ํ•™๊ต์˜ ํŠน์„ฑํ™” ๊ต์œกํ™˜๊ฒฝ ๊ตฌ์ถ• ์‚ฌ์—…์€ ๋ฌด์—‡์ธ๊ฐ€์š”?",
45
+ "์žฌ๋‚œ ์•ˆ์ „ ๊ด€๋ฆฌ ์‹œ์Šคํ…œ ๊ตฌ์ถ• ์‚ฌ์—…์€ ์–ด๋–ค ๊ฒƒ์ด ์žˆ๋‚˜์š”?",
46
+ ]
47
+
48
+ for query in test_queries:
49
+ result = rag.generate_answer(query)
50
+ rag.print_result(result)
51
+ print("\n")
52
+
53
+ # ===== 4. ํ‰๊ฐ€ =====
54
+ print("\n" + "="*60)
55
+ print("์‹œ์Šคํ…œ ํ‰๊ฐ€")
56
+ print("="*60)
57
+
58
+ evaluator = RAGEvaluator(rag)
59
+ eval_results = evaluator.evaluate()
60
+
61
+ print("\n" + "="*60)
62
+ print("โœ… ๋ชจ๋“  ์ž‘์—… ์™„๋ฃŒ")
63
+ print("="*60)
64
+
65
+
66
+ if __name__ == "__main__":
67
+ main()
src/retriever/retriever.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_chroma import Chroma
2
+ from langchain_openai.embeddings import OpenAIEmbeddings
3
+ from langsmith import traceable
4
+ import time
5
+ import os
6
+ from rank_bm25 import BM25Okapi
7
+ import numpy as np
8
+ from sentence_transformers import CrossEncoder
9
+
10
+ from src.utils.config import RAGConfig
11
+
12
+
13
+ class RAGRetriever:
14
+ """RAG ๊ฒ€์ƒ‰ ์‹œ์Šคํ…œ (Hybrid Search + Re-ranker)"""
15
+
16
+ def __init__(self, config: RAGConfig = None):
17
+ self.config = config or RAGConfig()
18
+ self.vectorstore = None
19
+ self.embeddings = None
20
+
21
+ self._initialize_embeddings()
22
+ self._create_vectorstore()
23
+ self._initialize_bm25()
24
+ self._initialize_reranker()
25
+
26
+ def _initialize_embeddings(self):
27
+ """์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์ดˆ๊ธฐํ™”"""
28
+ os.environ["OPENAI_API_KEY"] = self.config.OPENAI_API_KEY
29
+
30
+ self.embeddings = OpenAIEmbeddings(
31
+ model=self.config.EMBEDDING_MODEL_NAME
32
+ )
33
+
34
+ def _create_vectorstore(self):
35
+ """๊ธฐ์กด ๋ฒกํ„ฐ์Šคํ† ์–ด ๋กœ๋“œ"""
36
+ self.vectorstore = Chroma(
37
+ embedding_function=self.embeddings,
38
+ persist_directory=self.config.DB_DIRECTORY,
39
+ collection_name=self.config.COLLECTION_NAME
40
+ )
41
+
42
+ def _initialize_bm25(self):
43
+ """BM25 ์ธ๋ฑ์Šค ์ƒ์„ฑ"""
44
+ all_docs = self.vectorstore.get()
45
+
46
+ self.doc_texts = all_docs['documents']
47
+ self.doc_ids = all_docs['ids']
48
+ self.doc_metadatas = all_docs['metadatas']
49
+
50
+ self.content_to_id = {text: doc_id for text, doc_id in zip(self.doc_texts, self.doc_ids)}
51
+
52
+ tokenized_docs = [doc.split() for doc in self.doc_texts]
53
+ self.bm25 = BM25Okapi(tokenized_docs)
54
+
55
+ print(f"โœ… BM25 ์ธ๋ฑ์Šค ์ƒ์„ฑ ์™„๋ฃŒ: {len(self.doc_texts)}๊ฐœ ๋ฌธ์„œ")
56
+
57
+ def _initialize_reranker(self):
58
+ """Re-ranker ์ดˆ๊ธฐํ™”"""
59
+ self.reranker = CrossEncoder('BAAI/bge-reranker-base')
60
+ print("โœ… Re-ranker ์ดˆ๊ธฐํ™” ์™„๋ฃŒ (bge-reranker-base)")
61
+
62
+ @staticmethod
63
+ def _min_max_normalize(scores):
64
+ """0~1 ๋ฒ”์œ„๋กœ ์ •๊ทœํ™”"""
65
+ scores = np.array(scores)
66
+ min_score = scores.min()
67
+ max_score = scores.max()
68
+
69
+ if max_score == min_score:
70
+ return np.full_like(scores, 0.5, dtype=float)
71
+
72
+ return (scores - min_score) / (max_score - min_score)
73
+
74
+ def _find_doc_id_by_content(self, content):
75
+ """๋ฌธ์„œ content๋กœ ID ์ฐพ๊ธฐ"""
76
+ return self.content_to_id.get(content, None)
77
+
78
+ def _rerank(self, query, documents, top_k):
79
+ """
80
+ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์žฌ์ •๋ ฌ
81
+
82
+ Args:
83
+ query: ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ
84
+ documents: hybrid_search ๊ฒฐ๊ณผ ๋ฆฌ์ŠคํŠธ
85
+ top_k: ์ตœ์ข… ๋ฐ˜ํ™˜ํ•  ๋ฌธ์„œ ์ˆ˜
86
+
87
+ Returns:
88
+ ์žฌ์ •๋ ฌ๋œ ์ƒ์œ„ k๊ฐœ ๋ฌธ์„œ
89
+ """
90
+ if len(documents) == 0:
91
+ return []
92
+
93
+ # 1. (query, document) ์Œ ์ƒ์„ฑ
94
+ pairs = [[query, doc['content']] for doc in documents]
95
+
96
+ # 2. CrossEncoder๋กœ ์ ์ˆ˜ ๊ณ„์‚ฐ
97
+ scores = self.reranker.predict(pairs)
98
+
99
+ # 3. ์ ์ˆ˜๋ฅผ ๋ฌธ์„œ์— ์ถ”๊ฐ€
100
+ for i, doc in enumerate(documents):
101
+ doc['rerank_score'] = float(scores[i])
102
+
103
+ # 4. ์ •๋ ฌ ๋ฐ ๋ฐ˜ํ™˜
104
+ sorted_docs = sorted(documents,
105
+ key=lambda x: x['rerank_score'],
106
+ reverse=True)
107
+
108
+ return sorted_docs[:top_k]
109
+
110
+ @traceable(
111
+ name="RAG_Hybrid_Search",
112
+ metadata={"component": "retriever", "version": "2.0"}
113
+ )
114
+ def hybrid_search(self, query, top_k=None, alpha=0.5):
115
+ """
116
+ Hybrid Search: BM25 + ์ž„๋ฒ ๋”ฉ ๊ฒฐํ•ฉ
117
+
118
+ Args:
119
+ query: ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ
120
+ top_k: ๋ฐ˜ํ™˜ํ•  ๋ฌธ์„œ ์ˆ˜
121
+ alpha: ์ž„๋ฒ ๋”ฉ ๊ฐ€์ค‘์น˜ (0~1)
122
+ """
123
+ start_time = time.time()
124
+
125
+ if top_k is None:
126
+ top_k = self.config.DEFAULT_TOP_K
127
+
128
+ # 1. BM25 ๊ฒ€์ƒ‰
129
+ tokenized_query = query.split()
130
+ bm25_scores = self.bm25.get_scores(tokenized_query)
131
+ bm25_normalized = self._min_max_normalize(bm25_scores)
132
+
133
+ # 2. ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰
134
+ embedding_results = self.vectorstore.similarity_search_with_score(
135
+ query, k=min(top_k * 3, len(self.doc_texts))
136
+ )
137
+
138
+ # 3. ์ž„๋ฒ ๋”ฉ ์ ์ˆ˜ ์ •๊ทœํ™”
139
+ embedding_scores_raw = {}
140
+ for doc, distance in embedding_results:
141
+ doc_id = self._find_doc_id_by_content(doc.page_content)
142
+ if doc_id:
143
+ embedding_scores_raw[doc_id] = 1 / (1 + distance)
144
+
145
+ if embedding_scores_raw:
146
+ embed_values = np.array(list(embedding_scores_raw.values()))
147
+ embed_normalized = self._min_max_normalize(embed_values)
148
+ embedding_scores = dict(zip(embedding_scores_raw.keys(), embed_normalized))
149
+ else:
150
+ embedding_scores = {}
151
+
152
+ # 4. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ
153
+ hybrid_scores = {}
154
+ for i, doc_id in enumerate(self.doc_ids):
155
+ bm25_score = bm25_normalized[i]
156
+ embed_score = embedding_scores.get(doc_id, 0)
157
+ hybrid_scores[doc_id] = (1 - alpha) * bm25_score + alpha * embed_score
158
+
159
+ # 5. ์ •๋ ฌ ๋ฐ ์ƒ์œ„ k๊ฐœ ์„ ํƒ
160
+ sorted_ids = sorted(hybrid_scores.keys(),
161
+ key=lambda x: hybrid_scores[x],
162
+ reverse=True)
163
+ top_ids = sorted_ids[:top_k]
164
+
165
+ # 6. ๊ฒฐ๊ณผ ํฌ๋งทํŒ…
166
+ formatted_results = []
167
+ for doc_id in top_ids:
168
+ idx = self.doc_ids.index(doc_id)
169
+ formatted_results.append({
170
+ 'content': self.doc_texts[idx],
171
+ 'metadata': self.doc_metadatas[idx],
172
+ 'hybrid_score': hybrid_scores[doc_id],
173
+ 'bm25_score': float(bm25_normalized[idx]),
174
+ 'embed_score': embedding_scores.get(doc_id, 0),
175
+ 'filename': self.doc_metadatas[idx].get('ํŒŒ์ผ๋ช…', 'N/A'),
176
+ 'organization': self.doc_metadatas[idx].get('๋ฐœ์ฃผ ๊ธฐ๊ด€', 'N/A')
177
+ })
178
+
179
+ end_time = time.time()
180
+ print(f"๐Ÿ” Hybrid ๊ฒ€์ƒ‰ ์™„๋ฃŒ: {len(formatted_results)}๊ฐœ (alpha={alpha}, {end_time-start_time:.3f}์ดˆ)")
181
+ return formatted_results
182
+
183
+ @traceable(
184
+ name="RAG_Hybrid_Search_Rerank",
185
+ metadata={"component": "retriever", "version": "3.0"}
186
+ )
187
+ def hybrid_search_with_rerank(self, query, top_k=None, alpha=0.5, rerank_candidates=None):
188
+ """
189
+ Hybrid Search + Re-ranking
190
+
191
+ Args:
192
+ query: ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ
193
+ top_k: ์ตœ์ข… ๋ฐ˜ํ™˜ํ•  ๋ฌธ์„œ ์ˆ˜
194
+ alpha: BM25/์ž„๋ฒ ๋”ฉ ๊ฐ€์ค‘์น˜
195
+ rerank_candidates: Re-rankํ•  ํ›„๋ณด ์ˆ˜ (None์ด๋ฉด top_k * 3)
196
+ """
197
+ start_time = time.time()
198
+
199
+ if top_k is None:
200
+ top_k = self.config.DEFAULT_TOP_K
201
+
202
+ if rerank_candidates is None:
203
+ rerank_candidates = top_k * 3
204
+
205
+ # 1. Hybrid Search๋กœ ํ›„๋ณด ๋ฌธ์„œ ๊ฐ€์ ธ์˜ค๊ธฐ
206
+ candidates = self.hybrid_search(query, top_k=rerank_candidates, alpha=alpha)
207
+
208
+ # 2. Re-ranking
209
+ if len(candidates) > 0:
210
+ results = self._rerank(query, candidates, top_k)
211
+ else:
212
+ results = []
213
+
214
+ end_time = time.time()
215
+ print(f"๐Ÿ”„ Re-ranking ์™„๋ฃŒ: {len(candidates)}๊ฐœ โ†’ {len(results)}๊ฐœ ({end_time-start_time:.3f}์ดˆ)")
216
+
217
+ return results
218
+
219
+ def search_with_mode(self, query, top_k=None, mode="hybrid_rerank", alpha=0.5):
220
+ """๊ฒ€์ƒ‰ ๋ชจ๋“œ ์„ ํƒ"""
221
+ if mode == "embedding":
222
+ return self.search(query, top_k)
223
+ elif mode == "bm25":
224
+ return self.hybrid_search(query, top_k, alpha=0.0)
225
+ elif mode == "hybrid":
226
+ return self.hybrid_search(query, top_k, alpha=alpha)
227
+ elif mode == "hybrid_rerank":
228
+ return self.hybrid_search_with_rerank(query, top_k, alpha)
229
+ else:
230
+ raise ValueError(f"Unknown mode: {mode}")
231
+
232
+ @traceable(
233
+ name="RAG_Retriever_Search",
234
+ metadata={"component": "retriever", "version": "1.0"}
235
+ )
236
+ def search(self, query: str, top_k: int = None, filter_metadata: dict = None):
237
+ """
238
+ ์œ ์‚ฌ ๋ฌธ์„œ ๊ฒ€์ƒ‰ (์ž„๋ฒ ๋”ฉ ๊ธฐ๋ฐ˜)
239
+ """
240
+ start_time = time.time()
241
+ if top_k is None:
242
+ top_k = self.config.DEFAULT_TOP_K
243
+
244
+ if filter_metadata:
245
+ results = self.vectorstore.similarity_search_with_score(
246
+ query, k=top_k, filter=filter_metadata
247
+ )
248
+ else:
249
+ results = self.vectorstore.similarity_search_with_score(
250
+ query, k=top_k
251
+ )
252
+
253
+ formatted_results = []
254
+ for doc, score in results:
255
+ formatted_results.append({
256
+ 'content': doc.page_content,
257
+ 'metadata': doc.metadata,
258
+ 'distance': score,
259
+ 'relevance_score': 1 - score,
260
+ 'filename': doc.metadata.get('ํŒŒ์ผ๋ช…', 'N/A'),
261
+ 'organization': doc.metadata.get('๋ฐœ์ฃผ ๊ธฐ๊ด€', 'N/A')
262
+ })
263
+
264
+ end_time = time.time()
265
+ print(f"๐Ÿ” ๊ฒ€์ƒ‰ ์™„๋ฃŒ: {len(results)}๊ฐœ ({end_time-start_time:.3f}์ดˆ)")
266
+ return formatted_results
267
+
268
+ def search_with_rerank(self, query, top_k=None, rerank_candidates=None):
269
+ """
270
+ ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰ + Re-ranking
271
+
272
+ Args:
273
+ query: ๊ฒ€์ƒ‰ ์ฟผ๋ฆฌ
274
+ top_k: ์ตœ์ข… ๋ฐ˜ํ™˜ํ•  ๋ฌธ์„œ ์ˆ˜
275
+ rerank_candidates: Re-rankํ•  ํ›„๋ณด ์ˆ˜
276
+
277
+ Returns:
278
+ ์žฌ์ •๋ ฌ๋œ ๋ฌธ์„œ ๋ฆฌ์ŠคํŠธ
279
+ """
280
+ start_time = time.time()
281
+
282
+ if top_k is None:
283
+ top_k = self.config.DEFAULT_TOP_K
284
+
285
+ if rerank_candidates is None:
286
+ rerank_candidates = top_k * 3
287
+
288
+ # 1. ์ž„๋ฒ ๋”ฉ ๊ฒ€์ƒ‰์œผ๋กœ ํ›„๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
289
+ candidates = self.search(query, top_k=rerank_candidates)
290
+
291
+ # 2. Re-ranking
292
+ if len(candidates) > 0:
293
+ results = self._rerank(query, candidates, top_k)
294
+ else:
295
+ results = []
296
+
297
+ end_time = time.time()
298
+ print(f"๐Ÿ”„ Embedding + Re-ranking ์™„๋ฃŒ: {len(candidates)}๊ฐœ โ†’ {len(results)}๊ฐœ ({end_time-start_time:.3f}์ดˆ)")
299
+
300
+ return results
301
+
302
+ def search_by_organization(self, query: str, organization: str, top_k: int = None):
303
+ """ํŠน์ • ๋ฐœ์ฃผ๊ธฐ๊ด€๋งŒ ๊ฒ€์ƒ‰"""
304
+ return self.search(
305
+ query, top_k=top_k, filter_metadata={'๋ฐœ์ฃผ ๊ธฐ๊ด€': organization}
306
+ )
307
+
308
+ def get_retriever(self):
309
+ """LangChain ์ฒด์ธ์šฉ Retriever ๋ฐ˜ํ™˜"""
310
+ return self.vectorstore.as_retriever(
311
+ search_type="similarity",
312
+ search_kwargs={"k": self.config.DEFAULT_TOP_K}
313
+ )
src/utils/config.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+
5
+ class Config:
6
+ """RAG ์‹œ์Šคํ…œ ํ†ตํ•ฉ ์„ค์ • ํด๋ž˜์Šค"""
7
+
8
+ def __init__(self):
9
+ # .env ํŒŒ์ผ ๋กœ๋“œ
10
+ load_dotenv()
11
+
12
+ # ===== API ํ‚ค =====
13
+ self.OPENAI_API_KEY = self._get_api_key()
14
+
15
+ # ===== ๊ฒฝ๋กœ ์„ค์ • =====
16
+ # ์ „์ฒ˜๋ฆฌ
17
+ self.META_CSV_PATH = "./data/data_list.csv"
18
+ self.BASE_FOLDER_PATH = "./data/files/"
19
+ self.OUTPUT_CHUNKS_PATH = "./data/rag_chunks_final.csv"
20
+
21
+ # RAG - ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์šฐ์„ , ์—†์œผ๋ฉด ๊ธฐ๋ณธ๊ฐ’
22
+ self.RAG_INPUT_PATH = "./data/rag_chunks_final.csv"
23
+ self.DB_DIRECTORY = os.getenv("CHROMA_DB_PATH", "./chroma_db")
24
+
25
+ # ===== ์ „์ฒ˜๋ฆฌ ์„ค์ • =====
26
+ self.CHUNK_SIZE = 1000
27
+ self.CHUNK_OVERLAP = 200
28
+ self.SEPARATORS = ["\n\n", "\n", " ", ""]
29
+ self.MIN_TEXT_LENGTH = 100
30
+
31
+ # ===== ์ž„๋ฒ ๋”ฉ ์„ค์ • =====
32
+ self.EMBEDDING_MODEL_NAME = "text-embedding-3-small"
33
+ self.BATCH_SIZE = 50
34
+ self.MAX_TOKENS_PER_BATCH = 250000
35
+
36
+ # ์ฒญํฌ ๊ฒ€์ฆ ๊ธฐ์ค€
37
+ self.MIN_CHUNK_LENGTH = 10
38
+ self.MAX_CHUNK_LENGTH = 10000
39
+
40
+ # ===== ๋ฒกํ„ฐ DB ์„ค์ • =====
41
+ self.COLLECTION_NAME = "rag_documents"
42
+
43
+ # ===== ๊ฒ€์ƒ‰ ์„ค์ • =====
44
+ self.DEFAULT_TOP_K = 10
45
+ self.DEFAULT_ALPHA = 0.5
46
+ self.DEFAULT_SEARCH_MODE = "hybrid_rerank"
47
+
48
+ # ===== LLM ์„ค์ • =====
49
+ self.LLM_MODEL_NAME = "gpt-4o-mini"
50
+ self.DEFAULT_TEMPERATURE = 0.0
51
+ self.DEFAULT_MAX_TOKENS = 1000
52
+
53
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
54
+ self.SYSTEM_PROMPT = "๋‹น์‹ ์€ RFP(์ œ์•ˆ์š”์ฒญ์„œ) ๋ถ„์„ ๋ฐ ์š”์•ฝ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค."
55
+
56
+ # ===== GGUF ๋กœ์ปฌ ๋ชจ๋ธ ์„ค์ • =====
57
+ # Model Hub ์‚ฌ์šฉ ์—ฌ๋ถ€ (ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์šฐ์„ )
58
+ self.USE_MODEL_HUB = os.getenv("USE_MODEL_HUB", "true").lower() == "true"
59
+
60
+ # Hugging Face Model Hub ์„ค์ •
61
+ # 1. QLoRA ๋ชจ๋ธ (Fine-tuned) - ๊ธฐ์กด ์„œ๋น„์Šค์šฉ
62
+ self.MODEL_HUB_REPO = os.getenv(
63
+ "MODEL_HUB_REPO",
64
+ "Dongjin1203/RFP_Documents_chatbot"
65
+ )
66
+ self.MODEL_HUB_FILENAME = os.getenv(
67
+ "MODEL_HUB_FILENAME",
68
+ "Llama-3-Open-Ko-8B.Q4_K_M.gguf"
69
+ )
70
+
71
+ # 2. Base ๋ชจ๋ธ (PEFT ์—†์Œ) - ๋น„๊ต ์‹คํ—˜์šฉ
72
+ self.BASE_MODEL_HUB_REPO = os.getenv(
73
+ "BASE_MODEL_HUB_REPO",
74
+ "beomi/Llama-3-Open-Ko-8B-gguf"
75
+ )
76
+ self.BASE_MODEL_HUB_FILENAME = os.getenv(
77
+ "BASE_MODEL_HUB_FILENAME",
78
+ "ggml-model-Q4_K_M.gguf"
79
+ )
80
+
81
+ # ๊ณตํ†ต ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ
82
+ self.MODEL_CACHE_DIR = os.getenv("MODEL_CACHE_DIR", ".cache/models")
83
+
84
+ # ๋กœ์ปฌ ๊ฒฝ๋กœ (USE_MODEL_HUB=false์ธ ๊ฒฝ์šฐ)
85
+ self.GGUF_MODEL_PATH = os.getenv("GGUF_MODEL_PATH", ".cache/models/Llama-3-Open-Ko-8B.Q4_K_M.gguf")
86
+
87
+
88
+ # GGUF GPU ์„ค์ • (T4 Medium ์ตœ์ ํ™” - 8B ๋ชจ๋ธ์šฉ)
89
+ self.GGUF_N_GPU_LAYERS = int(os.getenv("GGUF_N_GPU_LAYERS", "35")) # T4์—์„œ 8B ๋ชจ๋ธ ์ „์ฒด๋ฅผ GPU์— ๋กœ๋“œ
90
+ self.GGUF_N_CTX = int(os.getenv("GGUF_N_CTX", "2048")) # ์ปจํ…์ŠคํŠธ ๊ธธ์ด
91
+ self.GGUF_N_THREADS = int(os.getenv("GGUF_N_THREADS", "4")) # CPU ์Šค๋ ˆ๋“œ (GPU ์‚ฌ์šฉ ์‹œ ๋‚ฎ๊ฒŒ)
92
+ self.GGUF_MAX_NEW_TOKENS = int(os.getenv("GGUF_MAX_NEW_TOKENS", "512")) # ์ตœ๋Œ€ ์ƒ์„ฑ ํ† ํฐ
93
+ self.GGUF_TEMPERATURE = float(os.getenv("GGUF_TEMPERATURE", "0.7")) # ์ƒ์„ฑ ๋‹ค์–‘์„ฑ
94
+ self.GGUF_TOP_P = float(os.getenv("GGUF_TOP_P", "0.9")) # Nucleus sampling
95
+
96
+ def _get_api_key(self) -> str:
97
+ """ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๋กœ๋“œ"""
98
+ api_key = os.getenv("OPENAI_API_KEY")
99
+
100
+ if not api_key:
101
+ raise ValueError(
102
+ "OPENAI_API_KEY๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
103
+ "ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ์— .env ํŒŒ์ผ์„ ๋งŒ๋“ค๊ณ  OPENAI_API_KEY=your-key ๋ฅผ ์ถ”๊ฐ€ํ•˜์„ธ์š”."
104
+ )
105
+
106
+ return api_key
107
+
108
+ def validate_preprocess(self):
109
+ """์ „์ฒ˜๋ฆฌ ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
110
+ if not os.path.exists(self.META_CSV_PATH):
111
+ raise FileNotFoundError(
112
+ f"๋ฉ”ํƒ€ CSV ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {self.META_CSV_PATH}"
113
+ )
114
+
115
+ if not os.path.exists(self.BASE_FOLDER_PATH):
116
+ raise FileNotFoundError(
117
+ f"ํŒŒ์ผ ํด๋”๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {self.BASE_FOLDER_PATH}"
118
+ )
119
+
120
+ output_dir = os.path.dirname(self.OUTPUT_CHUNKS_PATH)
121
+ if output_dir:
122
+ os.makedirs(output_dir, exist_ok=True)
123
+
124
+ return True
125
+
126
+ def validate_rag(self):
127
+ """RAG ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
128
+ if not self.OPENAI_API_KEY:
129
+ raise ValueError("OPENAI_API_KEY๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
130
+
131
+ return True
132
+
133
+ def validate_gguf(self):
134
+ """GGUF ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
135
+ if not self.USE_MODEL_HUB:
136
+ # ๋กœ์ปฌ ํŒŒ์ผ ์‚ฌ์šฉ ์‹œ ๊ฒฝ๋กœ ํ™•์ธ
137
+ if not os.path.exists(self.GGUF_MODEL_PATH):
138
+ print(f"โš ๏ธ ๊ฒฝ๊ณ : GGUF ๋ชจ๋ธ ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค: {self.GGUF_MODEL_PATH}")
139
+ print(f" USE_MODEL_HUB=true๋กœ ์„ค์ •ํ•˜์—ฌ ์ž๋™ ๋‹ค์šด๋กœ๋“œํ•˜๊ฑฐ๋‚˜ ๋ชจ๋ธ ํŒŒ์ผ์„ ์ค€๋น„ํ•˜์„ธ์š”.")
140
+
141
+ # GPU ๋ ˆ์ด์–ด ์„ค์ • ํ™•์ธ
142
+ if self.GGUF_N_GPU_LAYERS > 0:
143
+ print(f"โœ… GPU ๊ฐ€์† ํ™œ์„ฑํ™”: {self.GGUF_N_GPU_LAYERS}๊ฐœ ๋ ˆ์ด์–ด")
144
+ else:
145
+ print(f"โš ๏ธ CPU ์ „์šฉ ๋ชจ๋“œ (n_gpu_layers=0)")
146
+
147
+ return True
148
+
149
+ def validate_all(self):
150
+ """์ „์ฒด ์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
151
+ self.validate_preprocess()
152
+ self.validate_rag()
153
+ self.validate_gguf()
154
+ return True
155
+
156
+ def validate(self):
157
+ """์„ค์ • ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ (ํ•˜์œ„ ํ˜ธํ™˜์„ฑ)"""
158
+ return self.validate_preprocess()
159
+
160
+ def print_gguf_config(self):
161
+ """GGUF ์„ค์ • ์ถœ๋ ฅ (๋””๋ฒ„๊น…์šฉ)"""
162
+ print("\n" + "="*50)
163
+ print("GGUF ๋ชจ๋ธ ์„ค์ •")
164
+ print("="*50)
165
+ print(f"Model Hub ์‚ฌ์šฉ: {self.USE_MODEL_HUB}")
166
+
167
+ if self.USE_MODEL_HUB:
168
+ print(f"\n[QLoRA ๋ชจ๋ธ]")
169
+ print(f" Hub Repo: {self.MODEL_HUB_REPO}")
170
+ print(f" Hub ํŒŒ์ผ๋ช…: {self.MODEL_HUB_FILENAME}")
171
+
172
+ print(f"\n[Base ๋ชจ๋ธ]")
173
+ print(f" Hub Repo: {self.BASE_MODEL_HUB_REPO}")
174
+ print(f" Hub ํŒŒ์ผ๋ช…: {self.BASE_MODEL_HUB_FILENAME}")
175
+
176
+ print(f"\n[๊ณตํ†ต]")
177
+ print(f" ์บ์‹œ ๋””๋ ‰ํ† ๋ฆฌ: {self.MODEL_CACHE_DIR}")
178
+ else:
179
+ print(f"๋กœ์ปฌ ๊ฒฝ๋กœ: {self.GGUF_MODEL_PATH}")
180
+ print(f"\nGPU ์„ค์ •:")
181
+ print(f" - GPU ๋ ˆ์ด์–ด: {self.GGUF_N_GPU_LAYERS}")
182
+ print(f" - ์ปจํ…์ŠคํŠธ: {self.GGUF_N_CTX}")
183
+ print(f" - ์Šค๋ ˆ๋“œ: {self.GGUF_N_THREADS}")
184
+ print(f"\n์ƒ์„ฑ ์„ค์ •:")
185
+ print(f" - Max Tokens: {self.GGUF_MAX_NEW_TOKENS}")
186
+ print(f" - Temperature: {self.GGUF_TEMPERATURE}")
187
+ print(f" - Top-P: {self.GGUF_TOP_P}")
188
+ print("="*50 + "\n")
189
+
190
+
191
+ # ํ•˜์œ„ ํ˜ธํ™˜์„ฑ์„ ์œ„ํ•œ ๋ณ„์นญ
192
+ PreprocessConfig = Config
193
+ RAGConfig = Config