Spaces:

Dongjin1203
/

QLoRA_RAG_test

Sleeping

App Files Files Community

Dongjin1203 commited on Dec 8, 2025

Commit

9630ae8

0 Parent(s):

Initial commit - RFPilot experiment

Browse files

Files changed (20) hide show

.dockerignore +7 -0
.gitignore +234 -0
Dockerfile +30 -0
LICENSE +21 -0
README.md +30 -0
app.py +284 -0
app.py.old +284 -0
data/eval_dataset.json +59 -0
data/eval_template.csv +3 -0
requirements.txt +29 -0
src/analyze_results.py +0 -0
src/compare_models.py +325 -0
src/create_eval_dataset.py +247 -0
src/eval_dataset.py +33 -0
src/generator/generator_gguf.py +598 -0
src/generator/generator_gguf_base.py +516 -0
src/generator/generator_gguf_no_rag.py +396 -0
src/retriever/main.py +67 -0
src/retriever/retriever.py +313 -0
src/utils/config.py +193 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,7 @@

+__pycache__/
+*.pyc
+.git
+.env
+.venv
+*.log
+EOF

.gitignore ADDED Viewed

	@@ -0,0 +1,234 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# 모델 & DB
+chroma_db/
+models/
+*.gguf
+.cache/
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+# IDE
+.vscode/
+.idea/
+# OS
+.DS_Store
+Thumbs.db
+# Env
+.env
+.env.local

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
+WORKDIR /app
+# Python 설치
+RUN apt-get update && apt-get install -y \
+    python3.10 \
+    python3-pip \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+RUN pip install --no-cache-dir --upgrade pip
+# requirements.txt 복사
+COPY requirements.txt .
+# llama-cpp-python (wheel)
+RUN pip install --no-cache-dir \
+    llama-cpp-python==0.2.90 \
+    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
+# 나머지 패키지
+RUN pip install --no-cache-dir -r requirements.txt
+# 소스 복사
+COPY . .
+EXPOSE 7860
+CMD ["python3", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Dongjin
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+---
+title: RFPilot Model Comparison
+emoji: 🔬
+colorFrom: blue
+colorTo: green
+sdk: docker
+app_port: 7860
+app_file: app.py
+pinned: false
+license: mit
+---
+# QLoRA_RAG_test
+---
+이 프로젝트는 RFP 문서요약 RAG 챗봇 프로젝트의 후속 연구 입니다.
+## 문제
+기존 서비스 구조에서 Fine-Tuning된 모델에 RAG 시스템을 적용하였던 것이 과적합을 야기하는지, 어떤 효과가 있는지 확인 하지 못하였다.
+## 실험 절차
+- QLoRA 된 모델을 준비한다.
+- Fine-Tuning 하지 않은 원본 모델을 준비한다.
+- 평가 데이터셋을 생성한다.
+- Fine-Tuning을 한 경우, Fine-Tuning을 하지 않고 RAG만 적용한 경우, 둘 다 적용한 경우를 나눠 테스트를 해본다.
+- 결과를 확인 한다.
+---
+## 결과

app.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""
+HuggingFace Space용 실험 앱
+Gradio를 사용하여 웹 UI에서 실험을 실행하고 결과를 확인합니다.
+"""
+import gradio as gr
+import os
+import json
+import pandas as pd
+from pathlib import Path
+import matplotlib.pyplot as plt
+from datetime import datetime
+# 프로젝트 경로 설정
+import sys
+sys.path.insert(0, str(Path(__file__).parent))
+from src.eval_dataset import EvalDataset
+from src.compare_models import ModelComparison
+from src.analyze_results import ResultAnalyzer
+class ExperimentApp:
+    """실험 앱 클래스"""
+    def __init__(self):
+        self.experiment = None
+        self.latest_result_file = None
+    def setup_environment(self, api_key: str) -> str:
+        """환경 설정"""
+        if not api_key:
+            return "❌ OpenAI API 키를 입력해주세요."
+        os.environ['OPENAI_API_KEY'] = api_key
+        os.environ['USE_MODEL_HUB'] = 'true'
+        os.environ['GGUF_N_GPU_LAYERS'] = '35'
+        return "✅ 환경 설정 완료!"
+    def run_experiment(
+        self,
+        api_key: str,
+        distribution: str,
+        progress=gr.Progress()
+    ) -> tuple:
+        """실험 실행"""
+        try:
+            # 환경 설정
+            setup_msg = self.setup_environment(api_key)
+            if "❌" in setup_msg:
+                return setup_msg, None, None
+            progress(0.1, desc="환경 설정 완료...")
+            # Config 로드
+            from src.utils.config import RAGConfig
+            config = RAGConfig()
+            progress(0.2, desc="실험 초기화 중...")
+            # 실험 초기화
+            self.experiment = ModelComparison(
+                config=config,
+                output_dir="./experiments/results"
+            )
+            progress(0.3, desc="모델 로딩 중... (3-5분 소요)")
+            # 모델 로드
+            self.experiment.load_models()
+            progress(0.5, desc="실험 실행 중... (10-20분 소요)")
+            # 실험 실행
+            results = self.experiment.run_experiment(
+                distribution=distribution.lower(),
+                save_results=True
+            )
+            progress(0.9, desc="결과 분석 중...")
+            # 최신 결과 파일 찾기
+            result_files = sorted(
+                Path("./experiments/results").glob("results_*.json"),
+                reverse=True
+            )
+            self.latest_result_file = str(result_files[0]) if result_files else None
+            # 요약 생성
+            summary = self._generate_summary(results)
+            # CSV 생성
+            df = self._results_to_dataframe(results)
+            progress(1.0, desc="완료!")
+            return "✅ 실험 완료!", summary, df
+        except Exception as e:
+            return f"❌ 실험 실패: {str(e)}", None, None
+    def _generate_summary(self, results: dict) -> str:
+        """요약 생성"""
+        summary = "=" * 60 + "\n"
+        summary += "실험 결과 요약\n"
+        summary += "=" * 60 + "\n\n"
+        metadata = results['metadata']
+        summary += f"타임스탬프: {metadata['timestamp']}\n"
+        summary += f"분포: {metadata['distribution']}\n"
+        summary += f"모델: {', '.join(metadata['models'])}\n"
+        summary += f"총 질문 수: {metadata['total_queries']}\n\n"
+        # 각 분포별 요약
+        for dist_type, dist_results in results['results'].items():
+            summary += f"\n{'='*60}\n"
+            summary += f"{dist_type.upper()}\n"
+            summary += f"{'='*60}\n\n"
+            # 모델별 통계
+            model_stats = {}
+            for result in dist_results:
+                model = result['model']
+                if model not in model_stats:
+                    model_stats[model] = []
+                model_stats[model].append(result)
+            for model, model_results in model_stats.items():
+                success_count = sum(1 for r in model_results if r['success'])
+                avg_time = sum(r['elapsed_time'] for r in model_results if r['success']) / max(success_count, 1)
+                summary += f"[{model}]\n"
+                summary += f"  성공: {success_count}/{len(model_results)}\n"
+                summary += f"  평균 시간: {avg_time:.3f}초\n\n"
+        return summary
+    def _results_to_dataframe(self, results: dict) -> pd.DataFrame:
+        """결과를 DataFrame으로 변환"""
+        all_rows = []
+        for dist_type, dist_results in results['results'].items():
+            for result in dist_results:
+                row = {
+                    'distribution': dist_type,
+                    'model': result['model'],
+                    'query': result['query'],
+                    'success': result['success'],
+                    'elapsed_time': result['elapsed_time'],
+                    'total_tokens': result.get('usage', {}).get('total_tokens', 0)
+                }
+                all_rows.append(row)
+        return pd.DataFrame(all_rows)
+    def analyze_results(self) -> tuple:
+        """결과 분석"""
+        if not self.latest_result_file:
+            return "❌ 먼저 실험을 실행해주세요.", None, None, None, None
+        try:
+            analyzer = ResultAnalyzer(self.latest_result_file)
+            # 그래프 생성
+            analyzer.plot_time_comparison()
+            analyzer.plot_token_comparison()
+            analyzer.plot_rag_usage()
+            analyzer.plot_overfitting_analysis()
+            # 그래프 파일 경로
+            analysis_dir = Path(self.latest_result_file).parent / "analysis"
+            time_plot = str(analysis_dir / "time_comparison.png")
+            token_plot = str(analysis_dir / "token_comparison.png")
+            rag_plot = str(analysis_dir / "rag_usage.png")
+            overfitting_plot = str(analysis_dir / "overfitting_analysis.png")
+            return (
+                "✅ 분석 완료!",
+                time_plot if Path(time_plot).exists() else None,
+                token_plot if Path(token_plot).exists() else None,
+                rag_plot if Path(rag_plot).exists() else None,
+                overfitting_plot if Path(overfitting_plot).exists() else None
+            )
+        except Exception as e:
+            return f"❌ 분석 실패: {str(e)}", None, None, None, None
+# Gradio 인터페이스 생성
+def create_interface():
+    """Gradio 인터페이스 생성"""
+    app = ExperimentApp()
+    with gr.Blocks(title="RFPilot 모델 비교 실험") as demo:
+        gr.Markdown("""
+        # 🔬 RFPilot 모델 비교 실험
+        3가지 모델(QLoRA+RAG, QLoRA 단독, Base+RAG)의 성능을 비교합니다.
+        ⚠️ **주의**: 실험 실행 시간이 오래 걸립니다 (10-20분).
+        """)
+        with gr.Tab("🚀 실험 실행"):
+            api_key_input = gr.Textbox(
+                label="OpenAI API Key",
+                type="password",
+                placeholder="sk-..."
+            )
+            distribution_input = gr.Radio(
+                choices=["All", "In", "Out"],
+                value="All",
+                label="분포 선택"
+            )
+            run_btn = gr.Button("실험 시작", variant="primary")
+            status_output = gr.Textbox(label="상태", lines=2)
+            summary_output = gr.Textbox(label="요약", lines=20)
+            results_output = gr.Dataframe(label="결과")
+            run_btn.click(
+                fn=app.run_experiment,
+                inputs=[api_key_input, distribution_input],
+                outputs=[status_output, summary_output, results_output]
+            )
+        with gr.Tab("📊 결과 분석"):
+            analyze_btn = gr.Button("분석 시작", variant="primary")
+            analyze_status = gr.Textbox(label="상태")
+            with gr.Row():
+                time_plot = gr.Image(label="응답 시간 비교")
+                token_plot = gr.Image(label="토큰 사용량 비교")
+            with gr.Row():
+                rag_plot = gr.Image(label="RAG 사용 패턴")
+                overfitting_plot = gr.Image(label="과적합 분석")
+            analyze_btn.click(
+                fn=app.analyze_results,
+                outputs=[
+                    analyze_status,
+                    time_plot,
+                    token_plot,
+                    rag_plot,
+                    overfitting_plot
+                ]
+            )
+        with gr.Tab("ℹ️ 정보"):
+            gr.Markdown("""
+            ## 📋 비교 모델
+            | 모델 | 설명 |
+            |------|------|
+            | QLoRA + RAG | 기존 서비스 (QLoRA fine-tuning + RAG) |
+            | QLoRA 단독 | RAG 제거 (QLoRA만) |
+            | Base + RAG | PEFT 제거 (Base 모델 + RAG) |
+            ## 📊 측정 지표
+            - **과적합**: In-Distribution vs Out-Distribution 성능 차이
+            - **답변 속도**: 평균 응답 시간
+            - **토큰 사용량**: 평균 토큰 소비
+            - **RAG 사용 패턴**: RAG 활용도
+            ## ⏱️ 예상 소요 시간
+            - 모델 로딩: 3-5분
+            - 실험 실행: 10-20분 (25개 질문)
+            - 결과 분석: 1-2분
+            """)
+    return demo
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(share=True)

app.py.old ADDED Viewed

	@@ -0,0 +1,284 @@

+# """
+# HuggingFace Space용 실험 앱
+# Gradio를 사용하여 웹 UI에서 실험을 실행하고 결과를 확인합니다.
+# """
+# import gradio as gr
+# import os
+# import json
+# import pandas as pd
+# from pathlib import Path
+# import matplotlib.pyplot as plt
+# from datetime import datetime
+# # 프로젝트 경로 설정
+# import sys
+# sys.path.insert(0, str(Path(__file__).parent))
+# from experiments.eval_dataset import EvalDataset
+# from experiments.compare_models import ModelComparison
+# from experiments.analyze_results import ResultAnalyzer
+# class ExperimentApp:
+#     """실험 앱 클래스"""
+#     def __init__(self):
+#         self.experiment = None
+#         self.latest_result_file = None
+#     def setup_environment(self, api_key: str) -> str:
+#         """환경 설정"""
+#         if not api_key:
+#             return "❌ OpenAI API 키를 입력해주세요."
+#         os.environ['OPENAI_API_KEY'] = api_key
+#         os.environ['USE_MODEL_HUB'] = 'true'
+#         os.environ['GGUF_N_GPU_LAYERS'] = '35'
+#         return "✅ 환경 설정 완료!"
+#     def run_experiment(
+#         self,
+#         api_key: str,
+#         distribution: str,
+#         progress=gr.Progress()
+#     ) -> tuple:
+#         """실험 실행"""
+#         try:
+#             # 환경 설정
+#             setup_msg = self.setup_environment(api_key)
+#             if "❌" in setup_msg:
+#                 return setup_msg, None, None
+#             progress(0.1, desc="환경 설정 완료...")
+#             # Config 로드
+#             from src.utils.config import RAGConfig
+#             config = RAGConfig()
+#             progress(0.2, desc="실험 초기화 중...")
+#             # 실험 초기화
+#             self.experiment = ModelComparison(
+#                 config=config,
+#                 output_dir="./experiments/results"
+#             )
+#             progress(0.3, desc="모델 로딩 중... (3-5분 소요)")
+#             # 모델 로드
+#             self.experiment.load_models()
+#             progress(0.5, desc="실험 실행 중... (10-20분 소요)")
+#             # 실험 실행
+#             results = self.experiment.run_experiment(
+#                 distribution=distribution.lower(),
+#                 save_results=True
+#             )
+#             progress(0.9, desc="결과 분석 중...")
+#             # 최신 결과 파일 찾기
+#             result_files = sorted(
+#                 Path("./experiments/results").glob("results_*.json"),
+#                 reverse=True
+#             )
+#             self.latest_result_file = str(result_files[0]) if result_files else None
+#             # 요약 생성
+#             summary = self._generate_summary(results)
+#             # CSV 생성
+#             df = self._results_to_dataframe(results)
+#             progress(1.0, desc="완료!")
+#             return "✅ 실험 완료!", summary, df
+#         except Exception as e:
+#             return f"❌ 실험 실패: {str(e)}", None, None
+#     def _generate_summary(self, results: dict) -> str:
+#         """요약 생성"""
+#         summary = "=" * 60 + "\n"
+#         summary += "실험 결과 요약\n"
+#         summary += "=" * 60 + "\n\n"
+#         metadata = results['metadata']
+#         summary += f"타임스탬프: {metadata['timestamp']}\n"
+#         summary += f"분포: {metadata['distribution']}\n"
+#         summary += f"모델: {', '.join(metadata['models'])}\n"
+#         summary += f"총 질문 수: {metadata['total_queries']}\n\n"
+#         # 각 분포별 요약
+#         for dist_type, dist_results in results['results'].items():
+#             summary += f"\n{'='*60}\n"
+#             summary += f"{dist_type.upper()}\n"
+#             summary += f"{'='*60}\n\n"
+#             # 모델별 통계
+#             model_stats = {}
+#             for result in dist_results:
+#                 model = result['model']
+#                 if model not in model_stats:
+#                     model_stats[model] = []
+#                 model_stats[model].append(result)
+#             for model, model_results in model_stats.items():
+#                 success_count = sum(1 for r in model_results if r['success'])
+#                 avg_time = sum(r['elapsed_time'] for r in model_results if r['success']) / max(success_count, 1)
+#                 summary += f"[{model}]\n"
+#                 summary += f"  성공: {success_count}/{len(model_results)}\n"
+#                 summary += f"  평균 시간: {avg_time:.3f}초\n\n"
+#         return summary
+#     def _results_to_dataframe(self, results: dict) -> pd.DataFrame:
+#         """결과를 DataFrame으로 변환"""
+#         all_rows = []
+#         for dist_type, dist_results in results['results'].items():
+#             for result in dist_results:
+#                 row = {
+#                     'distribution': dist_type,
+#                     'model': result['model'],
+#                     'query': result['query'],
+#                     'success': result['success'],
+#                     'elapsed_time': result['elapsed_time'],
+#                     'total_tokens': result.get('usage', {}).get('total_tokens', 0)
+#                 }
+#                 all_rows.append(row)
+#         return pd.DataFrame(all_rows)
+#     def analyze_results(self) -> tuple:
+#         """결과 분석"""
+#         if not self.latest_result_file:
+#             return "❌ 먼저 실험을 실행해주세요.", None, None, None, None
+#         try:
+#             analyzer = ResultAnalyzer(self.latest_result_file)
+#             # 그래프 생성
+#             analyzer.plot_time_comparison()
+#             analyzer.plot_token_comparison()
+#             analyzer.plot_rag_usage()
+#             analyzer.plot_overfitting_analysis()
+#             # 그래프 파일 경로
+#             analysis_dir = Path(self.latest_result_file).parent / "analysis"
+#             time_plot = str(analysis_dir / "time_comparison.png")
+#             token_plot = str(analysis_dir / "token_comparison.png")
+#             rag_plot = str(analysis_dir / "rag_usage.png")
+#             overfitting_plot = str(analysis_dir / "overfitting_analysis.png")
+#             return (
+#                 "✅ 분석 완료!",
+#                 time_plot if Path(time_plot).exists() else None,
+#                 token_plot if Path(token_plot).exists() else None,
+#                 rag_plot if Path(rag_plot).exists() else None,
+#                 overfitting_plot if Path(overfitting_plot).exists() else None
+#             )
+#         except Exception as e:
+#             return f"❌ 분석 실패: {str(e)}", None, None, None, None
+# # Gradio 인터페이스 생성
+# def create_interface():
+#     """Gradio 인터페이스 생성"""
+#     app = ExperimentApp()
+#     with gr.Blocks(title="RFPilot 모델 비교 실험") as demo:
+#         gr.Markdown("""
+#         # 🔬 RFPilot 모델 비교 실험
+#         3가지 모델(QLoRA+RAG, QLoRA 단독, Base+RAG)의 성능을 비교합니다.
+#         ⚠️ **주의**: 실험 실행 시간이 오래 걸립니다 (10-20분).
+#         """)
+#         with gr.Tab("🚀 실험 실행"):
+#             api_key_input = gr.Textbox(
+#                 label="OpenAI API Key",
+#                 type="password",
+#                 placeholder="sk-..."
+#             )
+#             distribution_input = gr.Radio(
+#                 choices=["All", "In", "Out"],
+#                 value="All",
+#                 label="분포 선택"
+#             )
+#             run_btn = gr.Button("실험 시작", variant="primary")
+#             status_output = gr.Textbox(label="상태", lines=2)
+#             summary_output = gr.Textbox(label="요약", lines=20)
+#             results_output = gr.Dataframe(label="결과")
+#             run_btn.click(
+#                 fn=app.run_experiment,
+#                 inputs=[api_key_input, distribution_input],
+#                 outputs=[status_output, summary_output, results_output]
+#             )
+#         with gr.Tab("📊 결과 분석"):
+#             analyze_btn = gr.Button("분석 시작", variant="primary")
+#             analyze_status = gr.Textbox(label="상태")
+#             with gr.Row():
+#                 time_plot = gr.Image(label="응답 시간 비교")
+#                 token_plot = gr.Image(label="토큰 사용량 비교")
+#             with gr.Row():
+#                 rag_plot = gr.Image(label="RAG 사용 패턴")
+#                 overfitting_plot = gr.Image(label="과적합 분석")
+#             analyze_btn.click(
+#                 fn=app.analyze_results,
+#                 outputs=[
+#                     analyze_status,
+#                     time_plot,
+#                     token_plot,
+#                     rag_plot,
+#                     overfitting_plot
+#                 ]
+#             )
+#         with gr.Tab("ℹ️ 정보"):
+#             gr.Markdown("""
+#             ## 📋 비교 모델
+#             | 모델 | 설명 |
+#             |------|------|
+#             | QLoRA + RAG | 기존 서비스 (QLoRA fine-tuning + RAG) |
+#             | QLoRA 단독 | RAG 제거 (QLoRA만) |
+#             | Base + RAG | PEFT 제거 (Base 모델 + RAG) |
+#             ## 📊 측정 지표
+#             - **과적합**: In-Distribution vs Out-Distribution 성능 차이
+#             - **답변 속도**: 평균 응답 시간
+#             - **토큰 사용량**: 평균 토큰 소비
+#             - **RAG 사용 패턴**: RAG 활용도
+#             ## ⏱️ 예상 소요 시간
+#             - 모델 로딩: 3-5분
+#             - 실험 실행: 10-20분 (25개 질문)
+#             - 결과 분석: 1-2분
+#             """)
+#     return demo
+# if __name__ == "__main__":
+#     demo = create_interface()
+#     demo.launch(share=True)

data/eval_dataset.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "metadata": {
+    "version": "1.0",
+    "description": "RFPilot 평가 데이터셋",
+    "created_by": "manual_annotation"
+  },
+  "in_distribution": [
+    {
+      "query": "사업 제안서 제출 마감일은 언제인가요?",
+      "expected_answer": "2024년 3월 15일까지입니다.",
+      "category": "deadline",
+      "expected_type": "document",
+      "source_doc": "RFP_2024_001.hwp",
+      "metadata": {
+        "difficulty": "easy"
+      }
+    },
+    {
+      "query": "제안 요청서의 제출 서류는 무엇인가요?",
+      "expected_answer": "기술제안서, 가격제안서, 사업자등록증, 회사소개서가 필요합니다.",
+      "category": "requirements",
+      "expected_type": "document",
+      "source_doc": "RFP_2024_001.hwp",
+      "metadata": {
+        "difficulty": "medium"
+      }
+    },
+    {
+      "query": "사업 예산 규모는 얼마인가요?",
+      "expected_answer": "총 5억원입니다.",
+      "category": "budget",
+      "expected_type": "document",
+      "source_doc": "RFP_2024_002.hwp",
+      "metadata": {
+        "difficulty": "easy"
+      }
+    }
+  ],
+  "out_distribution": [
+    {
+      "query": "한국의 수도는 어디인가요?",
+      "expected_answer": "서울입니다.",
+      "category": "general_knowledge",
+      "expected_type": "out_of_scope",
+      "metadata": {
+        "difficulty": "easy"
+      }
+    },
+    {
+      "query": "파이썬에서 리스트와 튜플의 차이는 무엇인가요?",
+      "expected_answer": "리스트는 가변(mutable)이고, 튜플은 불변(immutable)입니다.",
+      "category": "programming",
+      "expected_type": "out_of_scope",
+      "metadata": {
+        "difficulty": "medium"
+      }
+    }
+  ]
+}

data/eval_template.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+distribution,query,expected_answer,category,source_doc,metadata
+in_distribution,사업 제안서 제출 마감일은 언제인가요?,2024년 3월 15일까지입니다.,deadline,RFP_2024_001.hwp,"{""difficulty"": ""easy""}"
+out_distribution,한국의 수도는 어디인가요?,서울입니다.,general_knowledge,,"{""difficulty"": ""easy""}"

requirements.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+# Core
+python-dotenv>=1.0.0
+openai>=1.0.0
+# LLM & RAG
+llama-cpp-python>=0.2.90
+chromadb>=0.4.0
+sentence-transformers>=2.2.0
+rank-bm25>=0.2.2
+huggingface-hub>=0.19.0
+# 문서 처리
+pypdf>=3.17.0
+python-docx>=1.1.0
+olefile>=0.47
+# 데이터 처리
+pandas>=2.0.0
+numpy>=1.24.0
+# 시각화
+matplotlib>=3.7.0
+seaborn>=0.12.0
+# 유틸리티
+tqdm>=4.65.0
+# Gradio
+gradio>=6.0.0

src/analyze_results.py ADDED Viewed

File without changes

src/compare_models.py ADDED Viewed

	@@ -0,0 +1,325 @@

+"""
+3가지 모델 비교 실험
+비교 대상:
+1. QLoRA + RAG (기존 서비스)
+2. QLoRA 단독 (RAG 제거)
+3. Base + RAG (PEFT 제거)
+측정 지표:
+- 과적합 여부 (In-Distribution vs Out-Distribution)
+- 답변 속도 (elapsed_time, retrieval_time, generation_time)
+- 토큰 개수 (total_tokens, prompt_tokens, completion_tokens)
+"""
+import os
+import sys
+import time
+import json
+import logging
+from typing import Dict, List, Any
+from datetime import datetime
+from pathlib import Path
+# 프로젝트 루트 경로 추가
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+from src.utils.config import RAGConfig
+from eval_dataset import EvalDataset
+# 로깅 설정
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+class ModelComparison:
+    """모델 비교 실험 클래스"""
+    def __init__(self, config=None, output_dir: str = "./results"):
+        """초기화"""
+        self.config = config or RAGConfig()
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        # 타임스탬프
+        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        # 데이터셋
+        self.dataset = EvalDataset()
+        # 모델 파이프라인
+        self.pipelines = {}
+        logger.info(f"✅ ModelComparison 초기화 완료")
+        logger.info(f"   결과 저장 경로: {self.output_dir}")
+    def load_models(self):
+        """3가지 모델 로드"""
+        logger.info("\n" + "="*60)
+        logger.info("모델 로딩 시작")
+        logger.info("="*60)
+        try:
+            # 1. QLoRA + RAG (기존)
+            logger.info("\n[1/3] QLoRA + RAG 모델 로딩...")
+            from src.generator.generator_gguf import GGUFRAGPipeline
+            self.pipelines['qlora_rag'] = GGUFRAGPipeline(config=self.config)
+            logger.info("✅ QLoRA + RAG 로드 완료")
+            # 2. QLoRA 단독 (RAG 제거)
+            logger.info("\n[2/3] QLoRA 단독 모델 로딩...")
+            from src.generator.generator_gguf_no_rag import GGUFNoRAGPipeline
+            self.pipelines['qlora_only'] = GGUFNoRAGPipeline(config=self.config)
+            logger.info("✅ QLoRA 단독 로드 완료")
+            # 3. Base + RAG (PEFT 제거)
+            logger.info("\n[3/3] Base + RAG 모델 로딩...")
+            from src.generator.generator_gguf_base import GGUFBaseRAGPipeline
+            self.pipelines['base_rag'] = GGUFBaseRAGPipeline(config=self.config)
+            logger.info("✅ Base + RAG 로드 완료")
+            logger.info("\n" + "="*60)
+            logger.info(f"✅ 총 {len(self.pipelines)}개 모델 로드 완료")
+            logger.info("="*60 + "\n")
+        except Exception as e:
+            logger.error(f"❌ 모델 로드 실패: {e}")
+            import traceback
+            traceback.print_exc()
+            raise
+    def run_single_query(
+        self,
+        model_name: str,
+        query: str,
+        query_info: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """단일 질문에 대한 모델 실행"""
+        pipeline = self.pipelines[model_name]
+        try:
+            start_time = time.time()
+            result = pipeline.generate_answer(query)
+            total_time = time.time() - start_time
+            # 결과 정리
+            return {
+                'model': model_name,
+                'query': query,
+                'category': query_info.get('category', 'unknown'),
+                'expected_type': query_info.get('expected_type', 'unknown'),
+                'answer': result['answer'],
+                'used_retrieval': result.get('used_retrieval', False),
+                'query_type': result.get('query_type', 'unknown'),
+                'search_mode': result.get('search_mode', 'none'),
+                'elapsed_time': total_time,
+                'model_elapsed_time': result.get('elapsed_time', 0),
+                'usage': result.get('usage', {}),
+                'sources_count': len(result.get('sources', [])),
+                'success': True,
+                'error': None
+            }
+        except Exception as e:
+            logger.error(f"❌ 질문 실행 실패 [{model_name}]: {e}")
+            return {
+                'model': model_name,
+                'query': query,
+                'category': query_info.get('category', 'unknown'),
+                'expected_type': query_info.get('expected_type', 'unknown'),
+                'answer': None,
+                'used_retrieval': False,
+                'query_type': 'error',
+                'search_mode': 'none',
+                'elapsed_time': 0,
+                'model_elapsed_time': 0,
+                'usage': {},
+                'sources_count': 0,
+                'success': False,
+                'error': str(e)
+            }
+    def run_experiment(
+        self,
+        distribution: str = 'all',
+        save_results: bool = True
+    ) -> Dict[str, List[Dict[str, Any]]]:
+        """
+        실험 실행
+        Args:
+            distribution: 'in', 'out', 'all'
+            save_results: 결과 저장 여부
+        """
+        logger.info("\n" + "="*60)
+        logger.info("실험 시작")
+        logger.info("="*60)
+        # 데이터셋 준비
+        if distribution == 'in':
+            queries_dict = {'in_distribution': self.dataset.get_in_distribution()}
+        elif distribution == 'out':
+            queries_dict = {'out_distribution': self.dataset.get_out_distribution()}
+        else:  # 'all'
+            queries_dict = self.dataset.get_all_queries()
+        # 결과 저장
+        all_results = {
+            'metadata': {
+                'timestamp': self.timestamp,
+                'distribution': distribution,
+                'models': list(self.pipelines.keys()),
+                'total_queries': sum(len(v) for v in queries_dict.values())
+            },
+            'results': {}
+        }
+        # 각 분포에 대해 실험
+        for dist_type, queries in queries_dict.items():
+            logger.info(f"\n{'='*60}")
+            logger.info(f"{dist_type.upper()} 실험 ({len(queries)}개 질문)")
+            logger.info(f"{'='*60}")
+            dist_results = []
+            # 각 질문에 대해
+            for i, query_info in enumerate(queries, 1):
+                query = query_info['query']
+                logger.info(f"\n[{i}/{len(queries)}] 질문: {query}")
+                # 각 모델에 대해
+                for model_name in self.pipelines.keys():
+                    logger.info(f"  → {model_name} 실행 중...")
+                    result = self.run_single_query(model_name, query, query_info)
+                    dist_results.append(result)
+                    if result['success']:
+                        logger.info(f"     ✅ 완료 ({result['elapsed_time']:.2f}초)")
+                    else:
+                        logger.warning(f"     ❌ 실패: {result['error']}")
+            all_results['results'][dist_type] = dist_results
+        # 결과 저장
+        if save_results:
+            self._save_results(all_results)
+        logger.info("\n" + "="*60)
+        logger.info("✅ 실험 완료")
+        logger.info("="*60 + "\n")
+        return all_results
+    def _save_results(self, results: Dict[str, Any]):
+        """결과 저장"""
+        # JSON 파일로 저장
+        output_file = self.output_dir / f"results_{self.timestamp}.json"
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(results, f, ensure_ascii=False, indent=2)
+        logger.info(f"📁 결과 저장: {output_file}")
+        # 요약 통계 저장
+        summary_file = self.output_dir / f"summary_{self.timestamp}.txt"
+        self._save_summary(results, summary_file)
+        logger.info(f"📊 요약 저장: {summary_file}")
+    def _save_summary(self, results: Dict[str, Any], output_file: Path):
+        """요약 통계 저장"""
+        with open(output_file, 'w', encoding='utf-8') as f:
+            f.write("="*60 + "\n")
+            f.write("실험 결과 요약\n")
+            f.write("="*60 + "\n\n")
+            # 메타데이터
+            metadata = results['metadata']
+            f.write(f"타임스탬프: {metadata['timestamp']}\n")
+            f.write(f"분포: {metadata['distribution']}\n")
+            f.write(f"모델: {', '.join(metadata['models'])}\n")
+            f.write(f"총 질문 수: {metadata['total_queries']}\n\n")
+            # 각 분포별 통계
+            for dist_type, dist_results in results['results'].items():
+                f.write(f"\n{'='*60}\n")
+                f.write(f"{dist_type.upper()} 결과\n")
+                f.write(f"{'='*60}\n\n")
+                # 모델별로 그룹화
+                model_stats = {}
+                for result in dist_results:
+                    model = result['model']
+                    if model not in model_stats:
+                        model_stats[model] = []
+                    model_stats[model].append(result)
+                # 각 모델별 통계
+                for model, model_results in model_stats.items():
+                    f.write(f"\n[{model}]\n")
+                    # 성공/실패
+                    success_count = sum(1 for r in model_results if r['success'])
+                    f.write(f"  성공: {success_count}/{len(model_results)}\n")
+                    # 평균 시간
+                    avg_time = sum(r['elapsed_time'] for r in model_results if r['success']) / max(success_count, 1)
+                    f.write(f"  평균 시간: {avg_time:.3f}초\n")
+                    # 평균 토큰
+                    total_tokens = sum(r['usage'].get('total_tokens', 0) for r in model_results if r['success'])
+                    avg_tokens = total_tokens / max(success_count, 1)
+                    f.write(f"  평균 토큰: {avg_tokens:.1f}\n")
+                    # RAG 사용률
+                    rag_count = sum(1 for r in model_results if r['used_retrieval'])
+                    f.write(f"  RAG 사용: {rag_count}/{len(model_results)} ({rag_count/len(model_results)*100:.1f}%)\n")
+def main():
+    """메인 함수"""
+    logger.info("="*60)
+    logger.info("RFPilot 모델 비교 실험")
+    logger.info("="*60)
+    # Config 로드
+    config = RAGConfig()
+    # 실험 초기화
+    experiment = ModelComparison(config=config, output_dir="./experiments/results")
+    # 데이터셋 확인
+    experiment.dataset.print_summary()
+    experiment.dataset.print_samples(n=3)
+    # 모델 로드
+    experiment.load_models()
+    # 실험 실행
+    # 옵션 1: 전체 실험
+    results = experiment.run_experiment(distribution='all', save_results=True)
+    # 옵션 2: In-Distribution만
+    # results = experiment.run_experiment(distribution='in', save_results=True)
+    # 옵션 3: Out-Distribution만
+    # results = experiment.run_experiment(distribution='out', save_results=True)
+    logger.info(f"\n✅ 모든 실험 완료!")
+    logger.info(f"   결과 저장 위치: {experiment.output_dir}")
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        logger.info("\n⚠️ 사용자에 의해 중단됨")
+    except Exception as e:
+        logger.error(f"\n❌ 실험 실패: {e}")
+        import traceback
+        traceback.print_exc()

src/create_eval_dataset.py ADDED Viewed

	@@ -0,0 +1,247 @@

+"""
+평가 데이터셋 생성 도구
+실제 RFP 문서에서 질문-답변 쌍을 만들어
+Ground Truth가 있는 평가 데이터셋을 생성합니다.
+사용법:
+    python create_eval_dataset.py --input data/rag_chunks_final.csv --output data/eval_dataset.json
+"""
+import json
+import csv
+import argparse
+from pathlib import Path
+from typing import List, Dict, Any
+class EvalDatasetCreator:
+    """평가 데이터셋 생성 클래스"""
+    def __init__(self):
+        self.dataset = {
+            "metadata": {
+                "version": "1.0",
+                "description": "RFPilot 평가 데이터셋",
+                "created_by": "manual_annotation"
+            },
+            "in_distribution": [],
+            "out_distribution": []
+        }
+    def add_in_distribution_sample(
+        self,
+        query: str,
+        expected_answer: str,
+        category: str,
+        source_doc: str = None,
+        metadata: Dict[str, Any] = None
+    ):
+        """In-Distribution 샘플 추가"""
+        sample = {
+            "query": query,
+            "expected_answer": expected_answer,
+            "category": category,
+            "expected_type": "document",
+            "source_doc": source_doc,
+            "metadata": metadata or {}
+        }
+        self.dataset["in_distribution"].append(sample)
+    def add_out_distribution_sample(
+        self,
+        query: str,
+        expected_answer: str,
+        category: str,
+        metadata: Dict[str, Any] = None
+    ):
+        """Out-Distribution 샘플 추가"""
+        sample = {
+            "query": query,
+            "expected_answer": expected_answer,
+            "category": category,
+            "expected_type": "out_of_scope",
+            "metadata": metadata or {}
+        }
+        self.dataset["out_distribution"].append(sample)
+    def create_template_dataset(self):
+        """템플릿 데이터셋 생성 (수동 작성용)"""
+        print("📝 템플릿 데이터셋 생성 중...")
+        # In-Distribution 템플릿
+        in_dist_templates = [
+            {
+                "query": "사업 제안서 제출 마감일은 언제인가요?",
+                "expected_answer": "2024년 3월 15일까지입니다.",  # 실제 문서에서 추출
+                "category": "deadline",
+                "source_doc": "RFP_2024_001.hwp",
+                "metadata": {"difficulty": "easy"}
+            },
+            {
+                "query": "제안 요청서의 제출 서류는 무엇인가요?",
+                "expected_answer": "기술제안서, 가격제안서, 사업자등록증, 회사소개서가 필요합니다.",
+                "category": "requirements",
+                "source_doc": "RFP_2024_001.hwp",
+                "metadata": {"difficulty": "medium"}
+            },
+            {
+                "query": "사업 예산 규모는 얼마인가요?",
+                "expected_answer": "총 5억원입니다.",
+                "category": "budget",
+                "source_doc": "RFP_2024_002.hwp",
+                "metadata": {"difficulty": "easy"}
+            },
+        ]
+        # Out-Distribution 템플릿
+        out_dist_templates = [
+            {
+                "query": "한국의 수도는 어디인가요?",
+                "expected_answer": "서울입니다.",
+                "category": "general_knowledge",
+                "metadata": {"difficulty": "easy"}
+            },
+            {
+                "query": "파이썬에서 리스트와 튜플의 차이는 무엇인가요?",
+                "expected_answer": "리스트는 가변(mutable)이고, 튜플은 불변(immutable)입니다.",
+                "category": "programming",
+                "metadata": {"difficulty": "medium"}
+            },
+        ]
+        # 데이터셋에 추가
+        for sample in in_dist_templates:
+            self.add_in_distribution_sample(**sample)
+        for sample in out_dist_templates:
+            self.add_out_distribution_sample(**sample)
+        print(f"✅ 템플릿 생성 완료")
+        print(f"   - In-Distribution: {len(in_dist_templates)}개")
+        print(f"   - Out-Distribution: {len(out_dist_templates)}개")
+        print(f"\n⚠️ 이 템플릿을 수정하여 실제 데이터를 채워주세요!")
+    def load_from_csv(self, csv_path: str):
+        """CSV에서 데이터셋 로드"""
+        print(f"📥 CSV 로드 중: {csv_path}")
+        with open(csv_path, 'r', encoding='utf-8') as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                distribution = row.get('distribution', 'in_distribution')
+                if distribution == 'in_distribution':
+                    self.add_in_distribution_sample(
+                        query=row['query'],
+                        expected_answer=row['expected_answer'],
+                        category=row['category'],
+                        source_doc=row.get('source_doc'),
+                        metadata=json.loads(row.get('metadata', '{}'))
+                    )
+                else:
+                    self.add_out_distribution_sample(
+                        query=row['query'],
+                        expected_answer=row['expected_answer'],
+                        category=row['category'],
+                        metadata=json.loads(row.get('metadata', '{}'))
+                    )
+        print(f"✅ CSV 로드 완료")
+    def save_json(self, output_path: str):
+        """JSON 형식으로 저장"""
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(self.dataset, f, ensure_ascii=False, indent=2)
+        print(f"💾 저장 완료: {output_path}")
+    def save_csv_template(self, output_path: str):
+        """수동 작성용 CSV 템플릿 저장"""
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, 'w', encoding='utf-8', newline='') as f:
+            writer = csv.DictWriter(f, fieldnames=[
+                'distribution', 'query', 'expected_answer',
+                'category', 'source_doc', 'metadata'
+            ])
+            writer.writeheader()
+            # In-Distribution 예시
+            writer.writerow({
+                'distribution': 'in_distribution',
+                'query': '사업 제안서 제출 마감일은 언제인가요?',
+                'expected_answer': '2024년 3월 15일까지입니다.',
+                'category': 'deadline',
+                'source_doc': 'RFP_2024_001.hwp',
+                'metadata': '{"difficulty": "easy"}'
+            })
+            # Out-Distribution 예시
+            writer.writerow({
+                'distribution': 'out_distribution',
+                'query': '한국의 수도는 어디인가요?',
+                'expected_answer': '서울입니다.',
+                'category': 'general_knowledge',
+                'source_doc': '',
+                'metadata': '{"difficulty": "easy"}'
+            })
+        print(f"📄 CSV 템플릿 저장: {output_path}")
+        print(f"   → 이 파일을 수정하여 실제 데이터를 채워주세요!")
+    def print_summary(self):
+        """데이터셋 요약 출력"""
+        print("\n" + "="*60)
+        print("데이터셋 요약")
+        print("="*60)
+        print(f"In-Distribution: {len(self.dataset['in_distribution'])}개")
+        print(f"Out-Distribution: {len(self.dataset['out_distribution'])}개")
+        print(f"총 샘플: {len(self.dataset['in_distribution']) + len(self.dataset['out_distribution'])}개")
+        print("="*60 + "\n")
+def main():
+    parser = argparse.ArgumentParser(description='평가 데이터셋 생성')
+    parser.add_argument('--mode', choices=['template', 'csv'], default='template',
+                        help='생성 모드: template (템플릿 생성) 또는 csv (CSV에서 로드)')
+    parser.add_argument('--input', type=str, help='입력 CSV 파일 경로')
+    parser.add_argument('--output', type=str, default='data/eval_dataset.json',
+                        help='출력 JSON 파일 경로')
+    parser.add_argument('--csv-template', type=str, default='data/eval_template.csv',
+                        help='CSV 템플릿 저장 경로')
+    args = parser.parse_args()
+    creator = EvalDatasetCreator()
+    if args.mode == 'template':
+        print("📝 템플릿 모드")
+        creator.create_template_dataset()
+        creator.save_json(args.output)
+        creator.save_csv_template(args.csv_template)
+    elif args.mode == 'csv':
+        if not args.input:
+            print("❌ CSV 모드에서는 --input 옵션이 필요합니다.")
+            return
+        print("📥 CSV 모드")
+        creator.load_from_csv(args.input)
+        creator.save_json(args.output)
+    creator.print_summary()
+    print("\n✅ 완료!")
+    print(f"\n다음 단계:")
+    print(f"1. {args.csv_template} 파일을 열어서 실제 데이터 작성")
+    print(f"2. python create_eval_dataset.py --mode csv --input {args.csv_template} --output {args.output}")
+    print(f"3. 생성된 {args.output}을 실험에 사용")
+if __name__ == "__main__":
+    main()

src/eval_dataset.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import json
+from pathlib import Path
+from typing import Dict, List, Any
+class EvalDataset:
+    def __init__(self, dataset_path: str = "data/eval_dataset.json"):
+        self.dataset_path = Path(dataset_path)
+        self.data = self._load_dataset()
+    def _load_dataset(self) -> Dict[str, Any]:
+        with open(self.dataset_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    def get_in_distribution(self) -> List[Dict[str, Any]]:
+        return self.data.get('in_distribution', [])
+    def get_out_distribution(self) -> List[Dict[str, Any]]:
+        return self.data.get('out_distribution', [])
+    def get_all_queries(self) -> Dict[str, List[Dict[str, Any]]]:
+        return {
+            'in_distribution': self.get_in_distribution(),
+            'out_distribution': self.get_out_distribution()
+        }
+    def print_summary(self):
+        print(f"In-Distribution: {len(self.get_in_distribution())}개")
+        print(f"Out-Distribution: {len(self.get_out_distribution())}개")
+    def print_samples(self, n: int = 3):
+        print("\n[In-Distribution 샘플]")
+        for item in self.get_in_distribution()[:n]:
+            print(f"  - {item['query']}")

src/generator/generator_gguf.py ADDED Viewed

	@@ -0,0 +1,598 @@

+from llama_cpp import Llama
+from typing import Optional, Dict, Any, List
+import logging
+import time
+import os
+from src.utils.config import RAGConfig
+from src.router.query_router import QueryRouter
+from src.prompts.dynamic_prompts import PromptManager
+# 로깅 설정
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class GGUFGenerator:
+    """
+    GGUF 기반 Llama-3 생성기
+    llama.cpp를 사용하여 GGUF 포맷 모델을 로드하고
+    입찰 관련 질의응답을 수행합니다.
+    """
+    def __init__(
+        self,
+        model_path: str,
+        n_gpu_layers: int = 0,
+        n_ctx: int = 8192,
+        n_threads: int = 8,
+        config = None,
+        max_new_tokens: int = 256,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        system_prompt: str = "당신은 RFP(제안요청서) 분석 및 요약 전문가입니다."
+    ):
+        """
+        생성기 초기화
+        Args:
+            model_path: GGUF 모델 파일 경로
+            n_gpu_layers: GPU에 올릴 레이어 수 (0 = CPU만, 35 = 전체 GPU)
+            n_ctx: 최대 컨텍스트 길이
+            n_threads: CPU 스레드 수
+            max_new_tokens: 최대 생성 토큰 수
+            temperature: 생성 다양성 (0.0~1.0)
+            top_p: Nucleus sampling 파라미터
+            system_prompt: 시스템 프롬프트
+        """
+        self.config = config or RAGConfig()
+        self.model_path = model_path
+        self.n_gpu_layers = n_gpu_layers
+        self.n_ctx = n_ctx
+        self.n_threads = n_threads
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+        self.top_p = top_p
+        self.system_prompt = system_prompt
+        # 모델 (나중에 로드)
+        self.model = None
+        logger.info(f"GGUFGenerator 초기화 완료")
+    def load_model(self) -> None:
+        """
+        GGUF 모델 로드
+        로직:
+        1. USE_MODEL_HUB 확인
+        2-A. True → Hugging Face Hub에서 다운로드
+        2-B. False → 로컬 파일 사용
+        3. 모델 로드
+        """
+        # 중복 로드 방지
+        if self.model is not None:
+            logger.info("모델이 이미 로드되어 있습니다.")
+            return
+        try:
+            # Config에서 USE_MODEL_HUB 확인 (없으면 True 기본값)
+            use_model_hub = getattr(self.config, 'USE_MODEL_HUB', True)
+            # Model Hub 사용 여부에 따라 경로 결정
+            if use_model_hub:
+                # === Model Hub에서 다운로드 ===
+                model_hub_repo = getattr(self.config, 'MODEL_HUB_REPO', 'beomi/Llama-3-Open-Ko-8B-gguf')
+                model_hub_filename = getattr(self.config, 'MODEL_HUB_FILENAME', 'ggml-model-Q4_K_M.gguf')
+                model_cache_dir = getattr(self.config, 'MODEL_CACHE_DIR', '.cache/models')
+                logger.info(f"📥 Model Hub에서 다운로드: {model_hub_repo}")
+                from huggingface_hub import hf_hub_download
+                model_path = hf_hub_download(
+                    repo_id=model_hub_repo,
+                    filename=model_hub_filename,
+                    cache_dir=model_cache_dir,
+                    local_dir=model_cache_dir,
+                    local_dir_use_symlinks=False  # 심볼릭 링크 대신 실제 복사
+                )
+                logger.info(f"✅ 다운로드 완료: {model_path}")
+            else:
+                # === 로컬 파일 사용 ===
+                model_path = self.model_path  # 생성자에서 받은 경로 사용
+                if not os.path.exists(model_path):
+                    raise FileNotFoundError(
+                        f"❌ 로컬 모델 파일을 찾을 수 없습니다: {model_path}\n"
+                        f"   USE_MODEL_HUB=true로 설정하거나 모델 파일을 준비하세요."
+                    )
+                logger.info(f"📂 로컬 모델 사용: {model_path}")
+            # === 공통: 모델 로드 ===
+            logger.info(f"🚀 GGUF 모델 로드 중...")
+            logger.info(f"   GPU 레이어: {self.n_gpu_layers}")
+            logger.info(f"   컨텍스트: {self.n_ctx}")
+            self.model = Llama(
+                model_path=model_path,
+                n_gpu_layers=self.n_gpu_layers,
+                n_ctx=self.n_ctx,
+                n_threads=self.n_threads,
+                verbose=True,  # ✅ 디버그 로그 활성화
+            )
+            # ✅ 실제 적용된 n_ctx 확인
+            actual_n_ctx = self.model.n_ctx()
+            logger.info("✅ GGUF 모델 로드 완료!")
+            logger.info(f"   - 설정한 n_ctx: {self.n_ctx}")
+            logger.info(f"   - 실제 n_ctx: {actual_n_ctx}")
+            if actual_n_ctx < self.n_ctx:
+                logger.warning(f"⚠️ n_ctx가 예상보다 작습니다: {actual_n_ctx} < {self.n_ctx}")
+                logger.warning(f"   메모리 부족일 수 있습니다. n_gpu_layers를 줄여보세요.")
+        except FileNotFoundError as e:
+            logger.error(f"❌ 모델 파일을 찾을 수 없습니다: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"❌ 모델 로드 실패: {e}")
+            raise RuntimeError(f"모델 로드 중 오류 발생: {e}")
+    def format_prompt(
+        self,
+        question: str,
+        context: Optional[str] = None,
+        system_prompt: Optional[str] = None
+    ) -> str:
+        """
+        GGUF 모델용 간단한 프롬프트 포맷팅
+        Llama-3 특수 토큰 대신 순수 텍스트 기반 템플릿 사용
+        """
+        # 시스템 프롬프트 설정
+        if system_prompt is None:
+            system_prompt = self.system_prompt
+        # 컨텍스트 포함 여부
+        if context is not None:
+            user_message = f"참고 문서:\n{context}\n\n질문: {question}"
+        else:
+            user_message = question
+        # 간단한 한국어 템플릿 (특수 토큰 없음)
+        formatted_prompt = f"""### 시스템
+{system_prompt}
+### 사용자
+{user_message}
+### 답변
+"""
+        return formatted_prompt
+    def generate(
+        self,
+        prompt: str,
+        max_new_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """
+        프롬프트를 입력받아 응답 생성
+        Args:
+            prompt: 포맷된 프롬프트
+            max_new_tokens: 최대 생성 토큰 수
+            temperature: 생성 다양성
+            top_p: Nucleus sampling
+        Returns:
+            생성된 응답 텍스트
+        Raises:
+            RuntimeError: 모델이 로드되지 않은 경우
+        """
+        # 모델 로드 확인
+        if self.model is None:
+            raise RuntimeError(
+                "모델이 로드되지 않았습니다. load_model()을 먼저 호출하세요."
+            )
+        # 파라미터 설정
+        if max_new_tokens is None:
+            max_new_tokens = self.max_new_tokens
+        if temperature is None:
+            temperature = self.temperature
+        if top_p is None:
+            top_p = self.top_p
+        try:
+            logger.info(f"🔄 생성 시작 (max_tokens={max_new_tokens}, temp={temperature})")
+            start_time = time.time()
+            # 생성
+            output = self.model(
+                prompt,
+                max_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                echo=False,  # 프롬프트 반복 안 함
+                stop=[
+                    # 구분자
+                    "###", "\n\n###",
+                    "### 사용자", "\n사용자:",
+                    "</s>",
+                    # 메타 텍스트 차단
+                    "한국어 답변", "한국어로 답변", "지침:",
+                    "문장", "(문장",
+                    # ✅ 질문 패턴 차단 (답변 후 질문 생성 방지)
+                    "\n\n",  # 단락 구분
+                    "?",     # 질문 기호
+                    "요?", "까?", "나요?", "습니까?"  # 질문 어미
+                ],
+            )
+            elapsed = time.time() - start_time
+            logger.info(f"✅ 생성 완료: {elapsed:.2f}초")
+            # 응답 추출
+            response = output['choices'][0]['text'].strip()
+            logger.info(f"📝 응답 길이: {len(response)} 글자")
+            return response
+        except Exception as e:
+            logger.error(f"❌ 생성 중 오류 발생: {e}")
+            raise RuntimeError(f"텍스트 생성 실패: {e}")
+    def chat(
+        self,
+        question: str,
+        context: Optional[str] = None,
+        system_prompt=None,
+        **kwargs
+    ) -> str:
+        """
+        질문에 대한 응답 생성 (통합 메서드)
+        Args:
+            question: 사용자 질문
+            context: 선택적 컨텍스트
+            system_prompt: 선택적 시스템 프롬프트
+            **kwargs: generate() 메서드에 전달될 추가 파라미터
+        Returns:
+            생성된 응답
+        """
+        # 프롬프트 포맷팅
+        prompt = self.format_prompt(
+            question=question,
+            context=context,
+            system_prompt=system_prompt
+        )
+        # 응답 생성
+        response = self.generate(prompt, **kwargs)
+        return response
+class GGUFRAGPipeline:
+    """
+    GGUF 생성기 + RAG 통합 파이프라인
+    chatbot_app.py와 호환되는 인터페이스 제공
+    """
+    def __init__(
+        self,
+        config=None,
+        model: str = None,  # 호환성용 (사용 안 함)
+        top_k: int = None,
+        # GPU 설정 (선택적, config 오버라이드)
+        n_gpu_layers: int = None,
+        n_ctx: int = None,
+        n_threads: int = None,
+        max_new_tokens: int = None,
+        temperature: float = None,
+        top_p: float = None,
+        search_mode: str = None,
+        alpha: float = None
+    ):
+        """
+        초기화
+        Args:
+            config: RAGConfig 객체
+            model: 모델 이름 (사용 안 함, 호환성용)
+            top_k: 기본 검색 문서 수
+            n_gpu_layers: GPU 레이어 수 (config 오버라이드)
+            n_ctx: 컨텍스트 길이 (config 오버라이드)
+            n_threads: CPU 스레드 수 (config 오버라이드)
+            max_new_tokens: 최대 생성 토큰 (config 오버라이드)
+            temperature: 생성 다양성 (config 오버라이드)
+            top_p: Nucleus sampling (config 오버라이드)
+            search_mode: 검색 모드
+            alpha: 임베딩 가중치
+        """
+        self.config = config or RAGConfig()
+        # Config에서 기본값 가져오기 (없으면 fallback)
+        self.top_k = top_k or getattr(self.config, 'DEFAULT_TOP_K', 10)
+        # 검색 설정
+        self.search_mode = search_mode or getattr(self.config, 'DEFAULT_SEARCH_MODE', 'hybrid_rerank')
+        self.alpha = alpha if alpha is not None else getattr(self.config, 'DEFAULT_ALPHA', 0.5)
+        # Retriever 초기화 (RAGRetriever 사용)
+        logger.info("RAGRetriever 초기화 중...")
+        from src.retriever.retriever import RAGRetriever
+        self.retriever = RAGRetriever(config=self.config)
+        # GGUF 설정 (파라미터가 주어지면 config 오버라이드, 없으면 기본값)
+        gguf_n_gpu_layers = n_gpu_layers if n_gpu_layers is not None else getattr(self.config, 'GGUF_N_GPU_LAYERS', 35)
+        gguf_n_ctx = n_ctx if n_ctx is not None else getattr(self.config, 'GGUF_N_CTX', 2048)
+        gguf_n_threads = n_threads if n_threads is not None else getattr(self.config, 'GGUF_N_THREADS', 4)
+        gguf_max_new_tokens = max_new_tokens if max_new_tokens is not None else getattr(self.config, 'GGUF_MAX_NEW_TOKENS', 512)
+        gguf_temperature = temperature if temperature is not None else getattr(self.config, 'GGUF_TEMPERATURE', 0.7)
+        gguf_top_p = top_p if top_p is not None else getattr(self.config, 'GGUF_TOP_P', 0.9)
+        # 모델 경로 (fallback)
+        gguf_model_path = getattr(self.config, 'GGUF_MODEL_PATH', '.cache/models/llama-3-ko-8b.gguf')
+        # 시스템 프롬프트 (fallback)
+        system_prompt = getattr(self.config, 'SYSTEM_PROMPT', '당신은 한국 공공기관 사업제안서 분석 전문가입니다.')
+        # GGUFGenerator 초기화
+        logger.info("GGUFGenerator 초기화 중...")
+        logger.info(f"   GPU 레이어: {gguf_n_gpu_layers}")
+        logger.info(f"   컨텍스트: {gguf_n_ctx}")
+        logger.info(f"   스레드: {gguf_n_threads}")
+        logger.info(f"   모델 경로: {gguf_model_path}")
+        self.generator = GGUFGenerator(
+            model_path=gguf_model_path,
+            n_gpu_layers=gguf_n_gpu_layers,
+            n_ctx=gguf_n_ctx,
+            n_threads=gguf_n_threads,
+            config=self.config,
+            max_new_tokens=gguf_max_new_tokens,
+            temperature=gguf_temperature,
+            top_p=gguf_top_p,
+            system_prompt=system_prompt
+        )
+        # 모델 로드 (시간 소요)
+        logger.info("GGUF 모델 로드 중...")
+        self.generator.load_model()
+        # 대화 히스토리
+        self.chat_history: List[Dict] = []
+        # 마지막 검색 결과 저장 (sources 반환용)
+        self._last_retrieved_docs = []
+        logger.info("✅ GGUFRAGPipeline 초기화 완료")
+        logger.info(f"   - 검색 모드: {self.search_mode}")
+        logger.info(f"   - 기본 top_k: {self.top_k}")
+    def _retrieve_and_format(self, query: str) -> str:
+        """검색 수행 및 컨텍스트 포맷팅"""
+        # 검색 모드에 따라 문서 검색 (RAGRetriever 메서드 사용)
+        if self.search_mode == "embedding":
+            docs = self.retriever.search(query, top_k=self.top_k)
+        elif self.search_mode == "embedding_rerank":
+            docs = self.retriever.search_with_rerank(query, top_k=self.top_k)
+        elif self.search_mode == "hybrid":
+            docs = self.retriever.hybrid_search(
+                query, top_k=self.top_k, alpha=self.alpha
+            )
+        elif self.search_mode == "hybrid_rerank":
+            docs = self.retriever.hybrid_search_with_rerank(
+                query, top_k=self.top_k, alpha=self.alpha
+            )
+        else:
+            docs = self.retriever.search(query, top_k=self.top_k)
+        # 마지막 검색 결과 저장
+        self._last_retrieved_docs = docs
+        # 컨텍스트 포맷팅
+        return self._format_context(docs)
+    def _format_context(self, retrieved_docs: list) -> str:
+        """
+        검색된 문서를 컨텍스트로 변환
+        컨텍스트가 너무 길면 자동으로 줄임 (토큰 제한 대응)
+        """
+        if not retrieved_docs:
+            return "관련 문서를 찾을 수 없습니다."
+        context_parts = []
+        max_context_chars = 8000  # 대략 2000 토큰 정도 (여유 있게)
+        current_length = 0
+        for i, doc in enumerate(retrieved_docs, 1):
+            doc_text = f"[문서 {i}]\n{doc['content']}\n"
+            doc_length = len(doc_text)
+            # 컨텍스트 길이 체크
+            if current_length + doc_length > max_context_chars:
+                logger.warning(f"⚠️ 컨텍스트 길이 제한: {i-1}개 문서만 사용 (최대 {max_context_chars}자)")
+                break
+            context_parts.append(doc_text)
+            current_length += doc_length
+        return "\n".join(context_parts)
+    def _format_sources(self, retrieved_docs: list) -> list:
+        """검색된 문서를 sources 형식으로 변환"""
+        sources = []
+        for doc in retrieved_docs:
+            source_info = {
+                'content': doc['content'],
+                'metadata': doc['metadata'],
+                'filename': doc.get('filename', 'N/A'),
+                'organization': doc.get('organization', 'N/A')
+            }
+            # 검색 모드에 따라 점수 필드가 다름
+            if 'rerank_score' in doc:
+                source_info['score'] = doc['rerank_score']
+                source_info['score_type'] = 'rerank'
+            elif 'hybrid_score' in doc:
+                source_info['score'] = doc['hybrid_score']
+                source_info['score_type'] = 'hybrid'
+            elif 'relevance_score' in doc:
+                source_info['score'] = doc['relevance_score']
+                source_info['score_type'] = 'embedding'
+            else:
+                source_info['score'] = 0
+                source_info['score_type'] = 'unknown'
+            sources.append(source_info)
+        return sources
+    def _estimate_usage(self, query: str, answer: str) -> dict:
+        """토큰 사용량 추정"""
+        # 간단한 단어 수 기반 추정
+        prompt_tokens = len(query.split()) * 2
+        completion_tokens = len(answer.split()) * 2
+        return {
+            'total_tokens': prompt_tokens + completion_tokens,
+            'prompt_tokens': prompt_tokens,
+            'completion_tokens': completion_tokens
+        }
+    def generate_answer(
+        self,
+        query: str,
+        top_k: int = None,
+        search_mode: str = None,
+        alpha: float = None
+    ) -> dict:
+        """
+        답변 생성 (chatbot_app.py 호환 메인 메서드)
+        Args:
+            query: 질문
+            top_k: 검색할 문서 수
+            search_mode: 검색 모드
+            alpha: 임베딩 가중치
+        Returns:
+            dict: answer, sources, search_mode, usage, elapsed_time, used_retrieval
+        """
+        try:
+            start_time = time.time()
+            # 파라미터 설정 (검색 전에 먼저 설정)
+            if top_k is not None:
+                self.top_k = top_k
+            if search_mode is not None:
+                self.search_mode = search_mode
+            if alpha is not None:
+                self.alpha = alpha
+            # ===== Router로 검색 여부 결정 =====
+            router = QueryRouter()
+            classification = router.classify(query)
+            query_type = classification['type']  # 'greeting'/'thanks'/'document'/'out_of_scope'
+            logger.info(f"📍 분류: {query_type} "
+                f"(신뢰도: {classification['confidence']:.2f})")
+            # 2. 타입별 처리
+            if query_type in ['greeting', 'thanks', 'out_of_scope']:
+                # 검색 스킵
+                context = None
+                used_retrieval = False
+                self._last_retrieved_docs = []
+                # 동적 프롬프트 선택 (GGUF용)
+                system_prompt = PromptManager.get_prompt(query_type, model_type="gguf")
+                logger.info(f"⏭️ RAG 스킵: {query_type}")
+            elif query_type == 'document':
+                # RAG 수행
+                context = self._retrieve_and_format(query)
+                used_retrieval = True
+                # 동적 프롬프트 (GGUF용, context 포함)
+                system_prompt = PromptManager.get_prompt('document', model_type="gguf")
+                logger.info(f"🔍 RAG 수행: {len(self._last_retrieved_docs)}개 문서")
+            # 3. 답변 생성 (system_prompt 전달)
+            answer = self.generator.chat(
+                question=query,
+                context=context,
+                system_prompt=system_prompt
+            )
+            elapsed_time = time.time() - start_time
+            # 대화 히스토리에 추가
+            self.chat_history.append({"role": "user", "content": query})
+            self.chat_history.append({"role": "assistant", "content": answer})
+            # 결과 반환 (RAGPipeline과 동일 형식)
+            return {
+                'answer': answer,
+                'sources': self._format_sources(self._last_retrieved_docs),
+                'used_retrieval': used_retrieval,
+                'query_type': query_type,
+                'search_mode': self.search_mode if used_retrieval else 'direct',
+                'routing_info': classification,
+                'elapsed_time': elapsed_time,
+                'usage': self._estimate_usage(query, answer)
+            }
+        except Exception as e:
+            logger.error(f"❌ 답변 생성 실패: {e}")
+            import traceback
+            traceback.print_exc()
+            raise RuntimeError(f"답변 생성 실패: {str(e)}") from e
+    def chat(self, query: str) -> str:
+        """간단한 대화 인터페이스"""
+        result = self.generate_answer(query)
+        return result['answer']
+    def clear_history(self):
+        """대화 히스토리 초기화"""
+        self.chat_history = []
+        logger.info("🗑️ 대화 히스토리가 초기화되었습니다.")
+    def get_history(self) -> List[Dict]:
+        """대화 히스토리 반환"""
+        return self.chat_history.copy()
+    def set_search_config(
+        self,
+        search_mode: str = None,
+        top_k: int = None,
+        alpha: float = None
+    ):
+        """검색 설정 변경"""
+        if search_mode is not None:
+            self.search_mode = search_mode
+        if top_k is not None:
+            self.top_k = top_k
+        if alpha is not None:
+            self.alpha = alpha
+        logger.info(
+            f"🔧 검색 설정 변경: mode={self.search_mode}, "
+            f"top_k={self.top_k}, alpha={self.alpha}"
+        )

src/generator/generator_gguf_base.py ADDED Viewed

	@@ -0,0 +1,516 @@

+from llama_cpp import Llama
+from typing import Optional, Dict, Any, List
+import logging
+import time
+import os
+from src.utils.config import RAGConfig
+from src.router.query_router import QueryRouter
+from src.prompts.dynamic_prompts import PromptManager
+# 로깅 설정
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class GGUFGenerator:
+    """
+    GGUF 기반 Llama-3 생성기
+    llama.cpp를 사용하여 GGUF 포맷 모델을 로드하고
+    입찰 관련 질의응답을 수행합니다.
+    """
+    def __init__(
+        self,
+        model_path: str,
+        n_gpu_layers: int = 0,
+        n_ctx: int = 8192,
+        n_threads: int = 8,
+        config = None,
+        max_new_tokens: int = 256,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        system_prompt: str = "당신은 RFP(제안요청서) 분석 및 요약 전문가입니다."
+    ):
+        """생성기 초기화"""
+        self.config = config or RAGConfig()
+        self.model_path = model_path
+        self.n_gpu_layers = n_gpu_layers
+        self.n_ctx = n_ctx
+        self.n_threads = n_threads
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+        self.top_p = top_p
+        self.system_prompt = system_prompt
+        # 모델 (나중에 로드)
+        self.model = None
+        logger.info(f"GGUFGenerator 초기화 완료 (Base 모델)")
+    def load_model(self) -> None:
+        """
+        GGUF 모델 로드
+        ✅ Base 모델 사용: Config에서 BASE_MODEL_HUB_REPO 가져오기
+        """
+        # 중복 로드 방지
+        if self.model is not None:
+            logger.info("모델이 이미 로드되어 있습니다.")
+            return
+        try:
+            # Config에서 USE_MODEL_HUB 확인
+            use_model_hub = getattr(self.config, 'USE_MODEL_HUB', True)
+            # Model Hub 사용 여부에 따라 경로 결정
+            if use_model_hub:
+                # === Model Hub에서 다운로드 ===
+                # ✅ Config에서 Base 모델 정보 가져오기
+                base_model_repo = getattr(
+                    self.config,
+                    'BASE_MODEL_HUB_REPO',
+                    'beomi/Llama-3-Open-Ko-8B-gguf'
+                )
+                base_model_filename = getattr(
+                    self.config,
+                    'BASE_MODEL_HUB_FILENAME',
+                    'ggml-model-Q4_K_M.gguf'
+                )
+                model_cache_dir = getattr(self.config, 'MODEL_CACHE_DIR', '.cache/models')
+                logger.info(f"📥 Base 모델 다운로드: {base_model_repo}")
+                logger.info(f"   파일명: {base_model_filename}")
+                from huggingface_hub import hf_hub_download
+                model_path = hf_hub_download(
+                    repo_id=base_model_repo,
+                    filename=base_model_filename,
+                    cache_dir=model_cache_dir,
+                    local_dir=model_cache_dir,
+                    local_dir_use_symlinks=False
+                )
+                logger.info(f"✅ Base 모델 다운로드 완료: {model_path}")
+            else:
+                # === 로컬 파일 사용 ===
+                model_path = self.model_path
+                if not os.path.exists(model_path):
+                    raise FileNotFoundError(
+                        f"❌ 로컬 모델 파일을 찾을 수 없습니다: {model_path}\n"
+                        f"   USE_MODEL_HUB=true로 설정하거나 모델 파일을 준비하세요."
+                    )
+                logger.info(f"📂 로컬 Base 모델 사용: {model_path}")
+            # === 공통: 모델 로드 ===
+            logger.info(f"🚀 Base GGUF 모델 로드 중...")
+            logger.info(f"   GPU 레이어: {self.n_gpu_layers}")
+            logger.info(f"   컨텍스트: {self.n_ctx}")
+            self.model = Llama(
+                model_path=model_path,
+                n_gpu_layers=self.n_gpu_layers,
+                n_ctx=self.n_ctx,
+                n_threads=self.n_threads,
+                verbose=True,
+            )
+            # 실제 적용된 n_ctx 확인
+            actual_n_ctx = self.model.n_ctx()
+            logger.info("✅ Base GGUF 모델 로드 완료!")
+            logger.info(f"   - 모델: {base_model_repo if use_model_hub else 'local'}")
+            logger.info(f"   - 설정한 n_ctx: {self.n_ctx}")
+            logger.info(f"   - 실제 n_ctx: {actual_n_ctx}")
+            if actual_n_ctx < self.n_ctx:
+                logger.warning(f"⚠️ n_ctx가 예상보다 작습니다: {actual_n_ctx} < {self.n_ctx}")
+        except FileNotFoundError as e:
+            logger.error(f"❌ 모델 ���일을 찾을 수 없습니다: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"❌ 모델 로드 실패: {e}")
+            raise RuntimeError(f"모델 로드 중 오류 발생: {e}")
+    def format_prompt(
+        self,
+        question: str,
+        context: Optional[str] = None,
+        system_prompt: Optional[str] = None
+    ) -> str:
+        """GGUF 모델용 간단한 프롬프트 포맷팅"""
+        if system_prompt is None:
+            system_prompt = self.system_prompt
+        if context is not None:
+            user_message = f"참고 문서:\n{context}\n\n질문: {question}"
+        else:
+            user_message = question
+        formatted_prompt = f"""### 시스템
+{system_prompt}
+### 사용자
+{user_message}
+### 답변
+"""
+        return formatted_prompt
+    def generate(
+        self,
+        prompt: str,
+        max_new_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """프롬프트를 입력받아 응답 생성"""
+        if self.model is None:
+            raise RuntimeError(
+                "모델이 로드되지 않았습니다. load_model()을 먼저 호출하세요."
+            )
+        if max_new_tokens is None:
+            max_new_tokens = self.max_new_tokens
+        if temperature is None:
+            temperature = self.temperature
+        if top_p is None:
+            top_p = self.top_p
+        try:
+            logger.info(f"🔄 생성 시작 (max_tokens={max_new_tokens}, temp={temperature})")
+            start_time = time.time()
+            output = self.model(
+                prompt,
+                max_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                echo=False,
+                stop=[
+                    "###", "\n\n###",
+                    "### 사용자", "\n사용자:",
+                    "</s>",
+                    "한국어 답변", "한국어로 답변", "지침:",
+                    "문장", "(문장",
+                    "\n\n",
+                    "?",
+                    "요?", "까?", "나요?", "습니까?"
+                ],
+            )
+            elapsed = time.time() - start_time
+            logger.info(f"✅ 생성 완료: {elapsed:.2f}초")
+            response = output['choices'][0]['text'].strip()
+            logger.info(f"📝 응답 길이: {len(response)} 글자")
+            return response
+        except Exception as e:
+            logger.error(f"❌ 생성 중 오류 발생: {e}")
+            raise RuntimeError(f"텍스트 생성 실패: {e}")
+    def chat(
+        self,
+        question: str,
+        context: Optional[str] = None,
+        system_prompt=None,
+        **kwargs
+    ) -> str:
+        """질문에 대한 응답 생성"""
+        prompt = self.format_prompt(
+            question=question,
+            context=context,
+            system_prompt=system_prompt
+        )
+        response = self.generate(prompt, **kwargs)
+        return response
+class GGUFBaseRAGPipeline:
+    """
+    Base 모델 + RAG 파이프라인
+    ✅ Base 모델 사용 (beomi/Llama-3-Open-Ko-8B)
+    ✅ RAG 유지
+    ✅ 기존 generator_gguf.py와 동일한 기능
+    """
+    def __init__(
+        self,
+        config=None,
+        model: str = None,
+        top_k: int = None,
+        n_gpu_layers: int = None,
+        n_ctx: int = None,
+        n_threads: int = None,
+        max_new_tokens: int = None,
+        temperature: float = None,
+        top_p: float = None,
+        search_mode: str = None,
+        alpha: float = None
+    ):
+        """초기화"""
+        self.config = config or RAGConfig()
+        # 검색 설정
+        self.top_k = top_k or getattr(self.config, 'DEFAULT_TOP_K', 10)
+        self.search_mode = search_mode or getattr(self.config, 'DEFAULT_SEARCH_MODE', 'hybrid_rerank')
+        self.alpha = alpha if alpha is not None else getattr(self.config, 'DEFAULT_ALPHA', 0.5)
+        # Retriever 초기화
+        logger.info("RAGRetriever 초기화 중...")
+        from src.retriever.retriever import RAGRetriever
+        self.retriever = RAGRetriever(config=self.config)
+        # GGUF 설정
+        gguf_n_gpu_layers = n_gpu_layers if n_gpu_layers is not None else getattr(self.config, 'GGUF_N_GPU_LAYERS', 35)
+        gguf_n_ctx = n_ctx if n_ctx is not None else getattr(self.config, 'GGUF_N_CTX', 2048)
+        gguf_n_threads = n_threads if n_threads is not None else getattr(self.config, 'GGUF_N_THREADS', 4)
+        gguf_max_new_tokens = max_new_tokens if max_new_tokens is not None else getattr(self.config, 'GGUF_MAX_NEW_TOKENS', 512)
+        gguf_temperature = temperature if temperature is not None else getattr(self.config, 'GGUF_TEMPERATURE', 0.7)
+        gguf_top_p = top_p if top_p is not None else getattr(self.config, 'GGUF_TOP_P', 0.9)
+        # 모델 경로 (사용 안 함, Hub에서 다운로드)
+        gguf_model_path = getattr(self.config, 'GGUF_MODEL_PATH', '.cache/models/llama-3-ko-8b.gguf')
+        # 시스템 프롬프트
+        system_prompt = getattr(self.config, 'SYSTEM_PROMPT', '당신은 한국 공공기관 사업제안서 분석 전문가입니다.')
+        # GGUFGenerator 초기화
+        logger.info("GGUFGenerator 초기화 중... (Base 모델)")
+        logger.info(f"   GPU 레이어: {gguf_n_gpu_layers}")
+        logger.info(f"   컨텍스트: {gguf_n_ctx}")
+        self.generator = GGUFGenerator(
+            model_path=gguf_model_path,
+            n_gpu_layers=gguf_n_gpu_layers,
+            n_ctx=gguf_n_ctx,
+            n_threads=gguf_n_threads,
+            config=self.config,
+            max_new_tokens=gguf_max_new_tokens,
+            temperature=gguf_temperature,
+            top_p=gguf_top_p,
+            system_prompt=system_prompt
+        )
+        # 모델 로드
+        logger.info("Base GGUF 모델 로드 중...")
+        self.generator.load_model()
+        # Router
+        self.router = QueryRouter()
+        # 대화 히스토리
+        self.chat_history: List[Dict] = []
+        # 마지막 검색 결과
+        self._last_retrieved_docs = []
+        logger.info("✅ GGUFBaseRAGPipeline 초기화 완료")
+        logger.info(f"   - 검색 모드: {self.search_mode}")
+        logger.info(f"   - 기본 top_k: {self.top_k}")
+    def _retrieve_and_format(self, query: str) -> str:
+        """검색 수행 및 컨텍스트 포맷팅"""
+        # 검색 모드에 따라 문서 검색
+        if self.search_mode == "embedding":
+            docs = self.retriever.search(query, top_k=self.top_k)
+        elif self.search_mode == "embedding_rerank":
+            docs = self.retriever.search_with_rerank(query, top_k=self.top_k)
+        elif self.search_mode == "hybrid":
+            docs = self.retriever.hybrid_search(
+                query, top_k=self.top_k, alpha=self.alpha
+            )
+        elif self.search_mode == "hybrid_rerank":
+            docs = self.retriever.hybrid_search_with_rerank(
+                query, top_k=self.top_k, alpha=self.alpha
+            )
+        else:
+            docs = self.retriever.search(query, top_k=self.top_k)
+        # 마지막 검색 결과 저장
+        self._last_retrieved_docs = docs
+        # 컨텍스트 포맷팅
+        return self._format_context(docs)
+    def _format_context(self, retrieved_docs: list) -> str:
+        """검색된 문서를 컨텍스트로 변환"""
+        if not retrieved_docs:
+            return "관련 문서를 찾을 수 없습니다."
+        context_parts = []
+        max_context_chars = 8000
+        current_length = 0
+        for i, doc in enumerate(retrieved_docs, 1):
+            doc_text = f"[문서 {i}]\n{doc['content']}\n"
+            doc_length = len(doc_text)
+            if current_length + doc_length > max_context_chars:
+                logger.warning(f"⚠️ 컨텍스트 길이 제한: {i-1}개 문서만 사용")
+                break
+            context_parts.append(doc_text)
+            current_length += doc_length
+        return "\n".join(context_parts)
+    def _format_sources(self, retrieved_docs: list) -> list:
+        """검색된 문서를 sources 형식으로 변환"""
+        sources = []
+        for doc in retrieved_docs:
+            source_info = {
+                'content': doc['content'],
+                'metadata': doc['metadata'],
+                'filename': doc.get('filename', 'N/A'),
+                'organization': doc.get('organization', 'N/A')
+            }
+            if 'rerank_score' in doc:
+                source_info['score'] = doc['rerank_score']
+                source_info['score_type'] = 'rerank'
+            elif 'hybrid_score' in doc:
+                source_info['score'] = doc['hybrid_score']
+                source_info['score_type'] = 'hybrid'
+            elif 'relevance_score' in doc:
+                source_info['score'] = doc['relevance_score']
+                source_info['score_type'] = 'embedding'
+            else:
+                source_info['score'] = 0
+                source_info['score_type'] = 'unknown'
+            sources.append(source_info)
+        return sources
+    def _estimate_usage(self, query: str, answer: str) -> dict:
+        """토큰 사용량 추정"""
+        prompt_tokens = len(query.split()) * 2
+        completion_tokens = len(answer.split()) * 2
+        return {
+            'total_tokens': prompt_tokens + completion_tokens,
+            'prompt_tokens': prompt_tokens,
+            'completion_tokens': completion_tokens
+        }
+    def generate_answer(
+        self,
+        query: str,
+        top_k: int = None,
+        search_mode: str = None,
+        alpha: float = None
+    ) -> dict:
+        """답변 생성 (Base 모델 + RAG)"""
+        try:
+            start_time = time.time()
+            # 파라미터 설정
+            if top_k is not None:
+                self.top_k = top_k
+            if search_mode is not None:
+                self.search_mode = search_mode
+            if alpha is not None:
+                self.alpha = alpha
+            # Router로 검색 여부 결정
+            classification = self.router.classify(query)
+            query_type = classification['type']
+            logger.info(f"📍 분류: {query_type} (신뢰도: {classification['confidence']:.2f})")
+            # 타입별 처리
+            if query_type in ['greeting', 'thanks', 'out_of_scope']:
+                # 검색 스킵
+                context = None
+                used_retrieval = False
+                self._last_retrieved_docs = []
+                # 동적 프롬프트
+                system_prompt = PromptManager.get_prompt(query_type, model_type="gguf")
+                logger.info(f"⏭️ RAG 스킵: {query_type}")
+            elif query_type == 'document':
+                # RAG 수행
+                context = self._retrieve_and_format(query)
+                used_retrieval = True
+                # 동적 프롬프트
+                system_prompt = PromptManager.get_prompt('document', model_type="gguf")
+                logger.info(f"🔍 RAG 수행: {len(self._last_retrieved_docs)}개 문서")
+            # 답변 생성
+            answer = self.generator.chat(
+                question=query,
+                context=context,
+                system_prompt=system_prompt
+            )
+            elapsed_time = time.time() - start_time
+            # 대화 히스토리 추가
+            self.chat_history.append({"role": "user", "content": query})
+            self.chat_history.append({"role": "assistant", "content": answer})
+            # 결과 반환
+            return {
+                'answer': answer,
+                'sources': self._format_sources(self._last_retrieved_docs),
+                'used_retrieval': used_retrieval,
+                'query_type': query_type,
+                'search_mode': self.search_mode if used_retrieval else 'direct',
+                'routing_info': classification,
+                'elapsed_time': elapsed_time,
+                'usage': self._estimate_usage(query, answer)
+            }
+        except Exception as e:
+            logger.error(f"❌ 답변 생성 실패: {e}")
+            import traceback
+            traceback.print_exc()
+            raise RuntimeError(f"답변 생성 실패: {str(e)}") from e
+    def chat(self, query: str) -> str:
+        """간단한 대화 인터페이스"""
+        result = self.generate_answer(query)
+        return result['answer']
+    def clear_history(self):
+        """대화 히스토리 초기화"""
+        self.chat_history = []
+        logger.info("🗑️ 대화 히스토리가 초기화되었습니다.")
+    def get_history(self) -> List[Dict]:
+        """대화 히스토리 반환"""
+        return self.chat_history.copy()
+    def set_search_config(
+        self,
+        search_mode: str = None,
+        top_k: int = None,
+        alpha: float = None
+    ):
+        """검색 설정 변경"""
+        if search_mode is not None:
+            self.search_mode = search_mode
+        if top_k is not None:
+            self.top_k = top_k
+        if alpha is not None:
+            self.alpha = alpha
+        logger.info(
+            f"🔧 검색 설정 변경: mode={self.search_mode}, "
+            f"top_k={self.top_k}, alpha={self.alpha}"
+        )

src/generator/generator_gguf_no_rag.py ADDED Viewed

	@@ -0,0 +1,396 @@

+from llama_cpp import Llama
+from typing import Optional, Dict, Any, List
+import logging
+import time
+import os
+from src.utils.config import RAGConfig
+from src.router.query_router import QueryRouter
+from src.prompts.dynamic_prompts import PromptManager
+# 로깅 설정
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class GGUFGenerator:
+    """
+    GGUF 기반 Llama-3 생성기
+    llama.cpp를 사용하여 GGUF 포맷 모델을 로드하고
+    입찰 관련 질의응답을 수행합니다.
+    """
+    def __init__(
+        self,
+        model_path: str,
+        n_gpu_layers: int = 0,
+        n_ctx: int = 8192,
+        n_threads: int = 8,
+        config = None,
+        max_new_tokens: int = 256,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        system_prompt: str = "당신은 RFP(제안요청서) 분석 및 요약 전문가입니다."
+    ):
+        """생성기 초기화"""
+        self.config = config or RAGConfig()
+        self.model_path = model_path
+        self.n_gpu_layers = n_gpu_layers
+        self.n_ctx = n_ctx
+        self.n_threads = n_threads
+        self.max_new_tokens = max_new_tokens
+        self.temperature = temperature
+        self.top_p = top_p
+        self.system_prompt = system_prompt
+        # 모델 (나중에 로드)
+        self.model = None
+        logger.info(f"GGUFGenerator 초기화 완료")
+    def load_model(self) -> None:
+        """GGUF 모델 로드"""
+        # 중복 로드 방지
+        if self.model is not None:
+            logger.info("모델이 이미 로드되어 있습니다.")
+            return
+        try:
+            # Config에서 USE_MODEL_HUB 확인
+            use_model_hub = getattr(self.config, 'USE_MODEL_HUB', True)
+            # Model Hub 사용 여부에 따라 경로 결정
+            if use_model_hub:
+                # === Model Hub에서 다운로드 ===
+                model_hub_repo = getattr(self.config, 'MODEL_HUB_REPO', 'beomi/Llama-3-Open-Ko-8B-gguf')
+                model_hub_filename = getattr(self.config, 'MODEL_HUB_FILENAME', 'ggml-model-Q4_K_M.gguf')
+                model_cache_dir = getattr(self.config, 'MODEL_CACHE_DIR', '.cache/models')
+                logger.info(f"📥 Model Hub에서 다운로드: {model_hub_repo}")
+                from huggingface_hub import hf_hub_download
+                model_path = hf_hub_download(
+                    repo_id=model_hub_repo,
+                    filename=model_hub_filename,
+                    cache_dir=model_cache_dir,
+                    local_dir=model_cache_dir,
+                    local_dir_use_symlinks=False
+                )
+                logger.info(f"✅ 다운로드 완료: {model_path}")
+            else:
+                # === 로컬 파일 사용 ===
+                model_path = self.model_path
+                if not os.path.exists(model_path):
+                    raise FileNotFoundError(
+                        f"❌ 로컬 모델 파일을 찾을 수 없습니다: {model_path}\n"
+                        f"   USE_MODEL_HUB=true로 설정하거나 모델 파일을 준비하세요."
+                    )
+                logger.info(f"📂 로컬 모델 사용: {model_path}")
+            # === 공통: 모델 로드 ===
+            logger.info(f"🚀 GGUF 모델 로드 중...")
+            logger.info(f"   GPU 레이어: {self.n_gpu_layers}")
+            logger.info(f"   컨텍스트: {self.n_ctx}")
+            self.model = Llama(
+                model_path=model_path,
+                n_gpu_layers=self.n_gpu_layers,
+                n_ctx=self.n_ctx,
+                n_threads=self.n_threads,
+                verbose=True,
+            )
+            # 실제 적용된 n_ctx 확인
+            actual_n_ctx = self.model.n_ctx()
+            logger.info("✅ GGUF 모델 로드 완료!")
+            logger.info(f"   - 설정한 n_ctx: {self.n_ctx}")
+            logger.info(f"   - 실제 n_ctx: {actual_n_ctx}")
+            if actual_n_ctx < self.n_ctx:
+                logger.warning(f"⚠️ n_ctx가 예상보다 작습니다: {actual_n_ctx} < {self.n_ctx}")
+                logger.warning(f"   메모리 부족일 수 있습니다. n_gpu_layers를 줄여보세요.")
+        except FileNotFoundError as e:
+            logger.error(f"❌ 모델 파일을 찾을 수 없습니다: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"❌ 모델 로드 실패: {e}")
+            raise RuntimeError(f"모델 로드 중 오류 발생: {e}")
+    def format_prompt(
+        self,
+        question: str,
+        context: Optional[str] = None,
+        system_prompt: Optional[str] = None
+    ) -> str:
+        """GGUF 모델용 간단한 프롬프트 포맷팅"""
+        # 시스템 프롬프트 설정
+        if system_prompt is None:
+            system_prompt = self.system_prompt
+        # 컨텍스트 포함 여부
+        if context is not None:
+            user_message = f"참고 문서:\n{context}\n\n질문: {question}"
+        else:
+            user_message = question
+        # 간단한 한국어 템플릿
+        formatted_prompt = f"""### 시스템
+{system_prompt}
+### 사용자
+{user_message}
+### 답변
+"""
+        return formatted_prompt
+    def generate(
+        self,
+        prompt: str,
+        max_new_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+    ) -> str:
+        """프롬프트를 입력받아 응답 생성"""
+        # 모델 로드 확인
+        if self.model is None:
+            raise RuntimeError(
+                "모델이 로드되지 않았습니다. load_model()을 먼저 호출하세요."
+            )
+        # 파라미터 설정
+        if max_new_tokens is None:
+            max_new_tokens = self.max_new_tokens
+        if temperature is None:
+            temperature = self.temperature
+        if top_p is None:
+            top_p = self.top_p
+        try:
+            logger.info(f"🔄 생성 시작 (max_tokens={max_new_tokens}, temp={temperature})")
+            start_time = time.time()
+            # 생성
+            output = self.model(
+                prompt,
+                max_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                echo=False,
+                stop=[
+                    "###", "\n\n###",
+                    "### 사용자", "\n사용자:",
+                    "</s>",
+                    "한국어 답변", "한국어로 답변", "지침:",
+                    "문장", "(문장",
+                    "\n\n",
+                    "?",
+                    "요?", "까?", "나요?", "습니까?"
+                ],
+            )
+            elapsed = time.time() - start_time
+            logger.info(f"✅ 생성 완료: {elapsed:.2f}초")
+            # 응답 추출
+            response = output['choices'][0]['text'].strip()
+            logger.info(f"📝 응답 길이: {len(response)} 글자")
+            return response
+        except Exception as e:
+            logger.error(f"❌ 생성 중 오류 발생: {e}")
+            raise RuntimeError(f"텍스트 생성 실패: {e}")
+    def chat(
+        self,
+        question: str,
+        context: Optional[str] = None,
+        system_prompt=None,
+        **kwargs
+    ) -> str:
+        """질문에 대한 응답 생성 (통합 메서드)"""
+        # 프롬프트 포맷팅
+        prompt = self.format_prompt(
+            question=question,
+            context=context,
+            system_prompt=system_prompt
+        )
+        # 응답 생성
+        response = self.generate(prompt, **kwargs)
+        return response
+class GGUFNoRAGPipeline:
+    """
+    QLoRA 모델 단독 파이프라인 (RAG 제거)
+    ✅ Retriever 완전 제거
+    ✅ Router만 유지 (greeting/thanks 처리용)
+    ✅ 순수 모델 성능만 측정
+    """
+    def __init__(
+        self,
+        config=None,
+        model: str = None,
+        top_k: int = None,
+        n_gpu_layers: int = None,
+        n_ctx: int = None,
+        n_threads: int = None,
+        max_new_tokens: int = None,
+        temperature: float = None,
+        top_p: float = None
+    ):
+        """초기화"""
+        self.config = config or RAGConfig()
+        # GGUF 설정
+        gguf_n_gpu_layers = n_gpu_layers if n_gpu_layers is not None else getattr(self.config, 'GGUF_N_GPU_LAYERS', 35)
+        gguf_n_ctx = n_ctx if n_ctx is not None else getattr(self.config, 'GGUF_N_CTX', 2048)
+        gguf_n_threads = n_threads if n_threads is not None else getattr(self.config, 'GGUF_N_THREADS', 4)
+        gguf_max_new_tokens = max_new_tokens if max_new_tokens is not None else getattr(self.config, 'GGUF_MAX_NEW_TOKENS', 512)
+        gguf_temperature = temperature if temperature is not None else getattr(self.config, 'GGUF_TEMPERATURE', 0.7)
+        gguf_top_p = top_p if top_p is not None else getattr(self.config, 'GGUF_TOP_P', 0.9)
+        # 모델 경로
+        gguf_model_path = getattr(self.config, 'GGUF_MODEL_PATH', '.cache/models/llama-3-ko-8b.gguf')
+        # 시스템 프롬프트
+        system_prompt = getattr(self.config, 'SYSTEM_PROMPT', '당신은 한국 공공기관 사업제안서 분석 전문가입니다.')
+        # GGUFGenerator 초기화
+        logger.info("GGUFGenerator 초기화 중... (RAG 없음)")
+        logger.info(f"   GPU 레이어: {gguf_n_gpu_layers}")
+        logger.info(f"   컨텍스트: {gguf_n_ctx}")
+        logger.info(f"   스레드: {gguf_n_threads}")
+        self.generator = GGUFGenerator(
+            model_path=gguf_model_path,
+            n_gpu_layers=gguf_n_gpu_layers,
+            n_ctx=gguf_n_ctx,
+            n_threads=gguf_n_threads,
+            config=self.config,
+            max_new_tokens=gguf_max_new_tokens,
+            temperature=gguf_temperature,
+            top_p=gguf_top_p,
+            system_prompt=system_prompt
+        )
+        # 모델 로드
+        logger.info("GGUF 모델 로드 중...")
+        self.generator.load_model()
+        # ✅ Retriever 없음 (완전 제거)
+        self.retriever = None
+        # Router (greeting/thanks 처리용)
+        self.router = QueryRouter()
+        # 대화 히스토리
+        self.chat_history: List[Dict] = []
+        logger.info("✅ GGUFNoRAGPipeline 초기화 완료 (RAG 제거)")
+        logger.info("   - Retriever: ❌ 없음")
+        logger.info("   - Router: ✅ 있음 (greeting/thanks용)")
+    def _estimate_usage(self, query: str, answer: str) -> dict:
+        """토큰 사용량 추정"""
+        prompt_tokens = len(query.split()) * 2
+        completion_tokens = len(answer.split()) * 2
+        return {
+            'total_tokens': prompt_tokens + completion_tokens,
+            'prompt_tokens': prompt_tokens,
+            'completion_tokens': completion_tokens
+        }
+    def generate_answer(
+        self,
+        query: str,
+        top_k: int = None,
+        search_mode: str = None,
+        alpha: float = None
+    ) -> dict:
+        """
+        답변 생성 (RAG 없음)
+        Args:
+            query: 질문
+            top_k: 사용 안 함 (호환성용)
+            search_mode: 사용 안 함 (호환성용)
+            alpha: 사용 안 함 (호환성용)
+        Returns:
+            dict: answer, sources, search_mode, usage, elapsed_time, used_retrieval
+        """
+        try:
+            start_time = time.time()
+            # Router로 질문 분류
+            classification = self.router.classify(query)
+            query_type = classification['type']
+            logger.info(f"📍 분류: {query_type} (신뢰도: {classification['confidence']:.2f})")
+            # 동적 프롬프트 선택
+            if query_type in ['greeting', 'thanks', 'out_of_scope']:
+                system_prompt = PromptManager.get_prompt(query_type, model_type="gguf")
+            else:
+                system_prompt = PromptManager.get_prompt('document', model_type="gguf")
+            # ✅ 항상 RAG 없이 생성 (context=None)
+            answer = self.generator.chat(
+                question=query,
+                context=None,  # ✅ 컨텍스트 없음
+                system_prompt=system_prompt
+            )
+            elapsed_time = time.time() - start_time
+            # 대화 히스토리 추가
+            self.chat_history.append({"role": "user", "content": query})
+            self.chat_history.append({"role": "assistant", "content": answer})
+            # 결과 반환
+            return {
+                'answer': answer,
+                'sources': [],  # ✅ 소스 없음
+                'used_retrieval': False,  # ✅ 검색 안 함
+                'query_type': query_type,
+                'search_mode': 'none',  # ✅ 검색 모드 없음
+                'routing_info': classification,
+                'elapsed_time': elapsed_time,
+                'usage': self._estimate_usage(query, answer)
+            }
+        except Exception as e:
+            logger.error(f"❌ 답변 생성 실패: {e}")
+            import traceback
+            traceback.print_exc()
+            raise RuntimeError(f"답변 생성 실패: {str(e)}") from e
+    def chat(self, query: str) -> str:
+        """간단한 대화 인터페이스"""
+        result = self.generate_answer(query)
+        return result['answer']
+    def clear_history(self):
+        """대화 히스토리 초기화"""
+        self.chat_history = []
+        logger.info("🗑️ 대화 히스토리가 초기화되었습니다.")
+    def get_history(self) -> List[Dict]:
+        """대화 히스토리 반환"""
+        return self.chat_history.copy()

src/retriever/main.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import os
+from RAG_pipeline_v1.rag_config import RAGConfig
+from RAG_pipeline_v1.rag_data_processing import RAGVectorDBPipeline
+from RAG_pipeline_v1.rag_pipeline import RAGPipeline
+from RAG_pipeline_v1.rag_evaluator import RAGEvaluator
+def main():
+    """메인 실행 함수"""
+    # ===== 환경 설정 =====
+    print("="*60)
+    print("RAG 시스템 초기화")
+    print("="*60)
+    os.environ["OPENAI_API_KEY"] = RAGConfig.OPENAI_API_KEY
+    config = RAGConfig()
+    config.validate()
+    print(config)
+    # ===== 1. Vector DB 구축 (최초 1회만) =====
+    # 주석 해제하여 실행
+    # print("\n" + "="*60)
+    # print("Vector DB 구축")
+    # print("="*60)
+    # db_pipeline = RAGVectorDBPipeline(config)
+    # vectorstore = db_pipeline.build()
+    # db_pipeline.test_search()
+    # ===== 2. RAG 파이프라인 초기화 =====
+    print("\n" + "="*60)
+    print("RAG 파이프라인 초기화")
+    print("="*60)
+    rag = RAGPipeline(config=config)
+    # ===== 3. 테스트 쿼리 =====
+    print("\n" + "="*60)
+    print("테스트 쿼리")
+    print("="*60)
+    test_queries = [
+        "한영대학교의 특성화 교육환경 구축 사업은 무엇인가요?",
+        "재난 안전 관리 시스템 구축 사업은 어떤 것이 있나요?",
+    ]
+    for query in test_queries:
+        result = rag.generate_answer(query)
+        rag.print_result(result)
+        print("\n")
+    # ===== 4. 평가 =====
+    print("\n" + "="*60)
+    print("시스템 평가")
+    print("="*60)
+    evaluator = RAGEvaluator(rag)
+    eval_results = evaluator.evaluate()
+    print("\n" + "="*60)
+    print("✅ 모든 작업 완료")
+    print("="*60)
+if __name__ == "__main__":
+    main()

src/retriever/retriever.py ADDED Viewed

	@@ -0,0 +1,313 @@

+from langchain_chroma import Chroma
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langsmith import traceable
+import time
+import os
+from rank_bm25 import BM25Okapi
+import numpy as np
+from sentence_transformers import CrossEncoder
+from src.utils.config import RAGConfig
+class RAGRetriever:
+    """RAG 검색 시스템 (Hybrid Search + Re-ranker)"""
+    def __init__(self, config: RAGConfig = None):
+        self.config = config or RAGConfig()
+        self.vectorstore = None
+        self.embeddings = None
+        self._initialize_embeddings()
+        self._create_vectorstore()
+        self._initialize_bm25()
+        self._initialize_reranker()
+    def _initialize_embeddings(self):
+        """임베딩 모델 초기화"""
+        os.environ["OPENAI_API_KEY"] = self.config.OPENAI_API_KEY
+        self.embeddings = OpenAIEmbeddings(
+            model=self.config.EMBEDDING_MODEL_NAME
+        )
+    def _create_vectorstore(self):
+        """기존 벡터스토어 로드"""
+        self.vectorstore = Chroma(
+            embedding_function=self.embeddings,
+            persist_directory=self.config.DB_DIRECTORY,
+            collection_name=self.config.COLLECTION_NAME
+        )
+    def _initialize_bm25(self):
+        """BM25 인덱스 생성"""
+        all_docs = self.vectorstore.get()
+        self.doc_texts = all_docs['documents']
+        self.doc_ids = all_docs['ids']
+        self.doc_metadatas = all_docs['metadatas']
+        self.content_to_id = {text: doc_id for text, doc_id in zip(self.doc_texts, self.doc_ids)}
+        tokenized_docs = [doc.split() for doc in self.doc_texts]
+        self.bm25 = BM25Okapi(tokenized_docs)
+        print(f"✅ BM25 인덱스 생성 완료: {len(self.doc_texts)}개 문서")
+    def _initialize_reranker(self):
+        """Re-ranker 초기화"""
+        self.reranker = CrossEncoder('BAAI/bge-reranker-base')
+        print("✅ Re-ranker 초기화 완료 (bge-reranker-base)")
+    @staticmethod
+    def _min_max_normalize(scores):
+        """0~1 범위로 정규화"""
+        scores = np.array(scores)
+        min_score = scores.min()
+        max_score = scores.max()
+        if max_score == min_score:
+            return np.full_like(scores, 0.5, dtype=float)
+        return (scores - min_score) / (max_score - min_score)
+    def _find_doc_id_by_content(self, content):
+        """문서 content로 ID 찾기"""
+        return self.content_to_id.get(content, None)
+    def _rerank(self, query, documents, top_k):
+        """
+        검색 결과 재정렬
+        Args:
+            query: 검색 쿼리
+            documents: hybrid_search 결과 리스트
+            top_k: 최종 반환할 문서 수
+        Returns:
+            재정렬된 상위 k개 문서
+        """
+        if len(documents) == 0:
+            return []
+        # 1. (query, document) 쌍 생성
+        pairs = [[query, doc['content']] for doc in documents]
+        # 2. CrossEncoder로 점수 계산
+        scores = self.reranker.predict(pairs)
+        # 3. 점수를 문서에 추가
+        for i, doc in enumerate(documents):
+            doc['rerank_score'] = float(scores[i])
+        # 4. 정렬 및 반환
+        sorted_docs = sorted(documents,
+                            key=lambda x: x['rerank_score'],
+                            reverse=True)
+        return sorted_docs[:top_k]
+    @traceable(
+        name="RAG_Hybrid_Search",
+        metadata={"component": "retriever", "version": "2.0"}
+    )
+    def hybrid_search(self, query, top_k=None, alpha=0.5):
+        """
+        Hybrid Search: BM25 + 임베딩 결합
+        Args:
+            query: 검색 쿼리
+            top_k: 반환할 문서 수
+            alpha: 임베딩 가중치 (0~1)
+        """
+        start_time = time.time()
+        if top_k is None:
+            top_k = self.config.DEFAULT_TOP_K
+        # 1. BM25 검색
+        tokenized_query = query.split()
+        bm25_scores = self.bm25.get_scores(tokenized_query)
+        bm25_normalized = self._min_max_normalize(bm25_scores)
+        # 2. 임베딩 검색
+        embedding_results = self.vectorstore.similarity_search_with_score(
+            query, k=min(top_k * 3, len(self.doc_texts))
+        )
+        # 3. 임베딩 점수 정규화
+        embedding_scores_raw = {}
+        for doc, distance in embedding_results:
+            doc_id = self._find_doc_id_by_content(doc.page_content)
+            if doc_id:
+                embedding_scores_raw[doc_id] = 1 / (1 + distance)
+        if embedding_scores_raw:
+            embed_values = np.array(list(embedding_scores_raw.values()))
+            embed_normalized = self._min_max_normalize(embed_values)
+            embedding_scores = dict(zip(embedding_scores_raw.keys(), embed_normalized))
+        else:
+            embedding_scores = {}
+        # 4. 하이브리드 점수 계산
+        hybrid_scores = {}
+        for i, doc_id in enumerate(self.doc_ids):
+            bm25_score = bm25_normalized[i]
+            embed_score = embedding_scores.get(doc_id, 0)
+            hybrid_scores[doc_id] = (1 - alpha) * bm25_score + alpha * embed_score
+        # 5. 정렬 및 상위 k개 선택
+        sorted_ids = sorted(hybrid_scores.keys(),
+                           key=lambda x: hybrid_scores[x],
+                           reverse=True)
+        top_ids = sorted_ids[:top_k]
+        # 6. 결과 포맷팅
+        formatted_results = []
+        for doc_id in top_ids:
+            idx = self.doc_ids.index(doc_id)
+            formatted_results.append({
+                'content': self.doc_texts[idx],
+                'metadata': self.doc_metadatas[idx],
+                'hybrid_score': hybrid_scores[doc_id],
+                'bm25_score': float(bm25_normalized[idx]),
+                'embed_score': embedding_scores.get(doc_id, 0),
+                'filename': self.doc_metadatas[idx].get('파일명', 'N/A'),
+                'organization': self.doc_metadatas[idx].get('발주 기관', 'N/A')
+            })
+        end_time = time.time()
+        print(f"🔍 Hybrid 검색 완료: {len(formatted_results)}개 (alpha={alpha}, {end_time-start_time:.3f}초)")
+        return formatted_results
+    @traceable(
+        name="RAG_Hybrid_Search_Rerank",
+        metadata={"component": "retriever", "version": "3.0"}
+    )
+    def hybrid_search_with_rerank(self, query, top_k=None, alpha=0.5, rerank_candidates=None):
+        """
+        Hybrid Search + Re-ranking
+        Args:
+            query: 검색 쿼리
+            top_k: 최종 반환할 문서 수
+            alpha: BM25/임베딩 가중치
+            rerank_candidates: Re-rank할 후보 수 (None이면 top_k * 3)
+        """
+        start_time = time.time()
+        if top_k is None:
+            top_k = self.config.DEFAULT_TOP_K
+        if rerank_candidates is None:
+            rerank_candidates = top_k * 3
+        # 1. Hybrid Search로 후보 문서 가져오기
+        candidates = self.hybrid_search(query, top_k=rerank_candidates, alpha=alpha)
+        # 2. Re-ranking
+        if len(candidates) > 0:
+            results = self._rerank(query, candidates, top_k)
+        else:
+            results = []
+        end_time = time.time()
+        print(f"🔄 Re-ranking 완료: {len(candidates)}개 → {len(results)}개 ({end_time-start_time:.3f}초)")
+        return results
+    def search_with_mode(self, query, top_k=None, mode="hybrid_rerank", alpha=0.5):
+        """검색 모드 선택"""
+        if mode == "embedding":
+            return self.search(query, top_k)
+        elif mode == "bm25":
+            return self.hybrid_search(query, top_k, alpha=0.0)
+        elif mode == "hybrid":
+            return self.hybrid_search(query, top_k, alpha=alpha)
+        elif mode == "hybrid_rerank":
+            return self.hybrid_search_with_rerank(query, top_k, alpha)
+        else:
+            raise ValueError(f"Unknown mode: {mode}")
+    @traceable(
+        name="RAG_Retriever_Search",
+        metadata={"component": "retriever", "version": "1.0"}
+    )
+    def search(self, query: str, top_k: int = None, filter_metadata: dict = None):
+        """
+        유사 문서 검색 (임베딩 기반)
+        """
+        start_time = time.time()
+        if top_k is None:
+            top_k = self.config.DEFAULT_TOP_K
+        if filter_metadata:
+            results = self.vectorstore.similarity_search_with_score(
+                query, k=top_k, filter=filter_metadata
+            )
+        else:
+            results = self.vectorstore.similarity_search_with_score(
+                query, k=top_k
+            )
+        formatted_results = []
+        for doc, score in results:
+            formatted_results.append({
+                'content': doc.page_content,
+                'metadata': doc.metadata,
+                'distance': score,
+                'relevance_score': 1 - score,
+                'filename': doc.metadata.get('파일명', 'N/A'),
+                'organization': doc.metadata.get('발주 기관', 'N/A')
+            })
+        end_time = time.time()
+        print(f"🔍 검색 완료: {len(results)}개 ({end_time-start_time:.3f}초)")
+        return formatted_results
+    def search_with_rerank(self, query, top_k=None, rerank_candidates=None):
+        """
+        임베딩 검색 + Re-ranking
+        Args:
+            query: 검색 쿼리
+            top_k: 최종 반환할 문서 수
+            rerank_candidates: Re-rank할 후보 수
+        Returns:
+            재정렬된 문서 리스트
+        """
+        start_time = time.time()
+        if top_k is None:
+            top_k = self.config.DEFAULT_TOP_K
+        if rerank_candidates is None:
+            rerank_candidates = top_k * 3
+        # 1. 임베딩 검색으로 후보 가져오기
+        candidates = self.search(query, top_k=rerank_candidates)
+        # 2. Re-ranking
+        if len(candidates) > 0:
+            results = self._rerank(query, candidates, top_k)
+        else:
+            results = []
+        end_time = time.time()
+        print(f"🔄 Embedding + Re-ranking 완료: {len(candidates)}개 → {len(results)}개 ({end_time-start_time:.3f}초)")
+        return results
+    def search_by_organization(self, query: str, organization: str, top_k: int = None):
+        """특정 발주기관만 검색"""
+        return self.search(
+            query, top_k=top_k, filter_metadata={'발주 기관': organization}
+        )
+    def get_retriever(self):
+        """LangChain 체인용 Retriever 반환"""
+        return self.vectorstore.as_retriever(
+            search_type="similarity",
+            search_kwargs={"k": self.config.DEFAULT_TOP_K}
+        )

src/utils/config.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import os
+from dotenv import load_dotenv
+class Config:
+    """RAG 시스템 통합 설정 클래스"""
+    def __init__(self):
+        # .env 파일 로드
+        load_dotenv()
+        # ===== API 키 =====
+        self.OPENAI_API_KEY = self._get_api_key()
+        # ===== 경로 설정 =====
+        # 전처리
+        self.META_CSV_PATH = "./data/data_list.csv"
+        self.BASE_FOLDER_PATH = "./data/files/"
+        self.OUTPUT_CHUNKS_PATH = "./data/rag_chunks_final.csv"
+        # RAG - 환경변수 우선, 없으면 기본값
+        self.RAG_INPUT_PATH = "./data/rag_chunks_final.csv"
+        self.DB_DIRECTORY = os.getenv("CHROMA_DB_PATH", "./chroma_db")
+        # ===== 전처리 설정 =====
+        self.CHUNK_SIZE = 1000
+        self.CHUNK_OVERLAP = 200
+        self.SEPARATORS = ["\n\n", "\n", " ", ""]
+        self.MIN_TEXT_LENGTH = 100
+        # ===== 임베딩 설정 =====
+        self.EMBEDDING_MODEL_NAME = "text-embedding-3-small"
+        self.BATCH_SIZE = 50
+        self.MAX_TOKENS_PER_BATCH = 250000
+        # 청크 검증 기준
+        self.MIN_CHUNK_LENGTH = 10
+        self.MAX_CHUNK_LENGTH = 10000
+        # ===== 벡터 DB 설정 =====
+        self.COLLECTION_NAME = "rag_documents"
+        # ===== 검색 설정 =====
+        self.DEFAULT_TOP_K = 10
+        self.DEFAULT_ALPHA = 0.5
+        self.DEFAULT_SEARCH_MODE = "hybrid_rerank"
+        # ===== LLM 설정 =====
+        self.LLM_MODEL_NAME = "gpt-4o-mini"
+        self.DEFAULT_TEMPERATURE = 0.0
+        self.DEFAULT_MAX_TOKENS = 1000
+        # 시스템 프롬프트
+        self.SYSTEM_PROMPT = "당신은 RFP(제안요청서) 분석 및 요약 전문가입니다."
+        # ===== GGUF 로컬 모델 설정 =====
+        # Model Hub 사용 여부 (환경변수 우선)
+        self.USE_MODEL_HUB = os.getenv("USE_MODEL_HUB", "true").lower() == "true"
+        # Hugging Face Model Hub 설정
+        # 1. QLoRA 모델 (Fine-tuned) - 기존 서비스용
+        self.MODEL_HUB_REPO = os.getenv(
+            "MODEL_HUB_REPO",
+            "Dongjin1203/RFP_Documents_chatbot"
+        )
+        self.MODEL_HUB_FILENAME = os.getenv(
+            "MODEL_HUB_FILENAME",
+            "Llama-3-Open-Ko-8B.Q4_K_M.gguf"
+        )
+        # 2. Base 모델 (PEFT 없음) - 비교 실험용
+        self.BASE_MODEL_HUB_REPO = os.getenv(
+            "BASE_MODEL_HUB_REPO",
+            "beomi/Llama-3-Open-Ko-8B-gguf"
+        )
+        self.BASE_MODEL_HUB_FILENAME = os.getenv(
+            "BASE_MODEL_HUB_FILENAME",
+            "ggml-model-Q4_K_M.gguf"
+        )
+        # 공통 캐시 디렉토리
+        self.MODEL_CACHE_DIR = os.getenv("MODEL_CACHE_DIR", ".cache/models")
+        # 로컬 경로 (USE_MODEL_HUB=false인 경우)
+        self.GGUF_MODEL_PATH = os.getenv("GGUF_MODEL_PATH", ".cache/models/Llama-3-Open-Ko-8B.Q4_K_M.gguf")
+        # GGUF GPU 설정 (T4 Medium 최적화 - 8B 모델용)
+        self.GGUF_N_GPU_LAYERS = int(os.getenv("GGUF_N_GPU_LAYERS", "35"))  # T4에서 8B 모델 전체를 GPU에 로드
+        self.GGUF_N_CTX = int(os.getenv("GGUF_N_CTX", "2048"))              # 컨텍스트 길이
+        self.GGUF_N_THREADS = int(os.getenv("GGUF_N_THREADS", "4"))         # CPU 스레드 (GPU 사용 시 낮게)
+        self.GGUF_MAX_NEW_TOKENS = int(os.getenv("GGUF_MAX_NEW_TOKENS", "512"))  # 최대 생성 토큰
+        self.GGUF_TEMPERATURE = float(os.getenv("GGUF_TEMPERATURE", "0.7"))       # 생성 다양성
+        self.GGUF_TOP_P = float(os.getenv("GGUF_TOP_P", "0.9"))                   # Nucleus sampling
+    def _get_api_key(self) -> str:
+        """환경변수에서 API 키 로드"""
+        api_key = os.getenv("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "OPENAI_API_KEY가 설정되지 않았습니다.\n"
+                "프로젝트 루트에 .env 파일을 만들고 OPENAI_API_KEY=your-key 를 추가하세요."
+            )
+        return api_key
+    def validate_preprocess(self):
+        """전처리 설정 유효성 검사"""
+        if not os.path.exists(self.META_CSV_PATH):
+            raise FileNotFoundError(
+                f"메타 CSV 파일을 찾을 수 없습니다: {self.META_CSV_PATH}"
+            )
+        if not os.path.exists(self.BASE_FOLDER_PATH):
+            raise FileNotFoundError(
+                f"파일 폴더를 찾을 수 없습니다: {self.BASE_FOLDER_PATH}"
+            )
+        output_dir = os.path.dirname(self.OUTPUT_CHUNKS_PATH)
+        if output_dir:
+            os.makedirs(output_dir, exist_ok=True)
+        return True
+    def validate_rag(self):
+        """RAG 설정 유효성 검사"""
+        if not self.OPENAI_API_KEY:
+            raise ValueError("OPENAI_API_KEY가 설정되지 않았습니다")
+        return True
+    def validate_gguf(self):
+        """GGUF 설정 유효성 검사"""
+        if not self.USE_MODEL_HUB:
+            # 로컬 파일 사용 시 경로 확인
+            if not os.path.exists(self.GGUF_MODEL_PATH):
+                print(f"⚠️ 경고: GGUF 모델 파일이 없습니다: {self.GGUF_MODEL_PATH}")
+                print(f"   USE_MODEL_HUB=true로 설정하여 자동 다운로드하거나 모델 파일을 준비하세요.")
+        # GPU 레이어 설정 확인
+        if self.GGUF_N_GPU_LAYERS > 0:
+            print(f"✅ GPU 가속 활성화: {self.GGUF_N_GPU_LAYERS}개 레이어")
+        else:
+            print(f"⚠️ CPU 전용 모드 (n_gpu_layers=0)")
+        return True
+    def validate_all(self):
+        """전체 설정 유효성 검사"""
+        self.validate_preprocess()
+        self.validate_rag()
+        self.validate_gguf()
+        return True
+    def validate(self):
+        """설정 유효성 검사 (하위 호환성)"""
+        return self.validate_preprocess()
+    def print_gguf_config(self):
+        """GGUF 설정 출력 (디버깅용)"""
+        print("\n" + "="*50)
+        print("GGUF 모델 설정")
+        print("="*50)
+        print(f"Model Hub 사용: {self.USE_MODEL_HUB}")
+        if self.USE_MODEL_HUB:
+            print(f"\n[QLoRA 모델]")
+            print(f"  Hub Repo: {self.MODEL_HUB_REPO}")
+            print(f"  Hub 파일명: {self.MODEL_HUB_FILENAME}")
+            print(f"\n[Base 모델]")
+            print(f"  Hub Repo: {self.BASE_MODEL_HUB_REPO}")
+            print(f"  Hub 파일명: {self.BASE_MODEL_HUB_FILENAME}")
+            print(f"\n[공통]")
+            print(f"  캐시 디렉토리: {self.MODEL_CACHE_DIR}")
+        else:
+            print(f"로컬 경로: {self.GGUF_MODEL_PATH}")
+        print(f"\nGPU 설정:")
+        print(f"  - GPU 레이어: {self.GGUF_N_GPU_LAYERS}")
+        print(f"  - 컨텍스트: {self.GGUF_N_CTX}")
+        print(f"  - 스레드: {self.GGUF_N_THREADS}")
+        print(f"\n생성 설정:")
+        print(f"  - Max Tokens: {self.GGUF_MAX_NEW_TOKENS}")
+        print(f"  - Temperature: {self.GGUF_TEMPERATURE}")
+        print(f"  - Top-P: {self.GGUF_TOP_P}")
+        print("="*50 + "\n")
+# 하위 호환성을 위한 별칭
+PreprocessConfig = Config
+RAGConfig = Config