Spaces:

gbrabbit
/

lily_fast_api

Sleeping

App Files Files Community

gbrabbit commited on Aug 19, 2025

Commit

130525d

1 Parent(s): b386020

Auto commit at 19-2025-08 20:43:11

Browse files

Files changed (32) hide show

0.41.0 +28 -0
0.7.0 +27 -0
README_CONTEXT_LORA.md +331 -0
download_kanana_model.py +57 -0
lily_llm_api/app_v2.py +635 -341
lily_llm_api/app_v2_250819_1305.py +0 -0
lily_llm_api/app_v2_250819_1828.py +0 -0
lily_llm_api/app_v2_250819_2008.py +0 -0
lily_llm_api/models/{configuration.py → back/configuration.py} +0 -0
lily_llm_api/models/{modeling.py → back/modeling.py} +0 -0
lily_llm_api/models/kanana_1_5_v_3b_instruct.py +47 -92
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_0915.py +280 -0
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_1134py +236 -0
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_1304.py +235 -0
lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_2008.py +235 -0
lily_llm_core/context_manager.py +410 -0
lily_llm_core/document_processor.py +4 -4
lily_llm_core/hybrid_rag_processor.py +31 -40
lily_llm_core/latex_ocr_processor.py +4 -3
lily_llm_core/latex_ocr_subprocess.py +17 -207
lily_llm_core/latex_ocr_subprocess_v2.py +27 -278
lily_llm_core/latex_rag_processor.py +46 -226
lily_llm_core/lora_manager.py +521 -0
refresh_tokenizer.py +60 -0
refresh_tokenizer_kanana.py +124 -0
refresh_tokenizer_polyglot.py +60 -0
requirements.txt +53 -73
requirements_250819_0958txt +78 -0
requirements_250819_1025.txt +56 -0
requirements_full_250819_0721.txt +316 -0
requirements_full_250819_2030.txt +133 -0
test_context_lora.py +228 -0

0.41.0 ADDED Viewed

	@@ -0,0 +1,28 @@

+Requirement already satisfied: peft in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (0.8.0)
+Requirement already satisfied: bitsandbytes in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (0.46.1)
+Requirement already satisfied: numpy>=1.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.2.6)
+Requirement already satisfied: packaging>=20.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (25.0)
+Requirement already satisfied: psutil in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (7.0.0)
+Requirement already satisfied: pyyaml in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (6.0.2)
+Requirement already satisfied: torch>=1.13.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.7.1)
+Requirement already satisfied: transformers in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.54.1)
+Requirement already satisfied: tqdm in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.67.1)
+Requirement already satisfied: accelerate>=0.21.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (1.9.0)
+Requirement already satisfied: safetensors in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.5.3)
+Requirement already satisfied: huggingface-hub>=0.17.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.34.3)
+Requirement already satisfied: filelock in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.18.0)
+Requirement already satisfied: typing-extensions>=4.10.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (4.14.1)
+Requirement already satisfied: sympy>=1.13.3 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (1.14.0)
+Requirement already satisfied: networkx in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.5)
+Requirement already satisfied: jinja2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.1.6)
+Requirement already satisfied: fsspec in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (2025.7.0)
+Requirement already satisfied: requests in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (2.32.4)
+Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from sympy>=1.13.3->torch>=1.13.0->peft) (1.3.0)
+Requirement already satisfied: colorama in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from tqdm->peft) (0.4.6)
+Requirement already satisfied: MarkupSafe>=2.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from jinja2->torch>=1.13.0->peft) (3.0.2)
+Requirement already satisfied: charset_normalizer<4,>=2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.4.2)
+Requirement already satisfied: idna<4,>=2.5 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.10)
+Requirement already satisfied: urllib3<3,>=1.21.1 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2.5.0)
+Requirement already satisfied: certifi>=2017.4.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2025.8.3)
+Requirement already satisfied: regex!=2019.12.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (2025.7.34)
+Requirement already satisfied: tokenizers<0.22,>=0.21 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (0.21.4)

0.7.0 ADDED Viewed

	@@ -0,0 +1,27 @@

+Requirement already satisfied: peft in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (0.8.0)
+Requirement already satisfied: numpy>=1.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.2.6)
+Requirement already satisfied: packaging>=20.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (25.0)
+Requirement already satisfied: psutil in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (7.0.0)
+Requirement already satisfied: pyyaml in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (6.0.2)
+Requirement already satisfied: torch>=1.13.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (2.7.1)
+Requirement already satisfied: transformers in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.54.1)
+Requirement already satisfied: tqdm in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (4.67.1)
+Requirement already satisfied: accelerate>=0.21.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (1.9.0)
+Requirement already satisfied: safetensors in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.5.3)
+Requirement already satisfied: huggingface-hub>=0.17.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from peft) (0.34.3)
+Requirement already satisfied: filelock in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (3.18.0)
+Requirement already satisfied: fsspec>=2023.5.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (2025.7.0)
+Requirement already satisfied: requests in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (2.32.4)
+Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from huggingface-hub>=0.17.0->peft) (4.14.1)
+Requirement already satisfied: sympy>=1.13.3 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (1.14.0)
+Requirement already satisfied: networkx in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.5)
+Requirement already satisfied: jinja2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from torch>=1.13.0->peft) (3.1.6)
+Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from sympy>=1.13.3->torch>=1.13.0->peft) (1.3.0)
+Requirement already satisfied: colorama in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from tqdm->peft) (0.4.6)
+Requirement already satisfied: MarkupSafe>=2.0 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from jinja2->torch>=1.13.0->peft) (3.0.2)
+Requirement already satisfied: charset_normalizer<4,>=2 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.4.2)
+Requirement already satisfied: idna<4,>=2.5 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (3.10)
+Requirement already satisfied: urllib3<3,>=1.21.1 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2.5.0)
+Requirement already satisfied: certifi>=2017.4.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from requests->huggingface-hub>=0.17.0->peft) (2025.8.3)
+Requirement already satisfied: regex!=2019.12.17 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (2025.7.34)
+Requirement already satisfied: tokenizers<0.22,>=0.21 in c:\project\lily_generate_project\lily_generate_package\lily_llm_env\lib\site-packages (from transformers->peft) (0.21.4)

README_CONTEXT_LORA.md ADDED Viewed

	@@ -0,0 +1,331 @@

+# Lily LLM - 컨텍스트 관리 및 LoRA/QLoRA 시스템
+## 📋 개요
+Lily LLM 프로젝트에 단기 기억(컨텍스트 창) 기능과 LoRA/QLoRA 지원을 추가하여 더욱 강력하고 효율적인 AI 대화 시스템을 구축했습니다.
+## 🚀 주요 기능
+### 1. 컨텍스트 관리 시스템 (Context Management)
+#### 🔧 핵심 기능
+- **대화 히스토리 관리**: 사용자와 AI 간의 대화를 순차적으로 저장
+- **메모리 최적화**: 설정된 제한에 도달하면 자동으로 컨텍스트 압축
+- **세션 관리**: 여러 대화 세션을 독립적으로 관리
+- **컨텍스트 검색**: 저장된 대화 내용에서 특정 정보 검색
+#### 📊 컨텍스트 전략
+- **Sliding Window**: 최근 메시지 우선 유지
+- **Priority Keep**: 시스템 프롬프트와 최근 메시지 우선
+- **Circular Buffer**: 순환 방식으로 메모리 관리
+#### 💾 데이터 관리
+- **내보내기/가져오기**: JSON 형식으로 컨텍스트 저장 및 복원
+- **메타데이터 지원**: 각 메시지에 추가 정보 첨부 가능
+- **통계 정보**: 메모리 사용량 및 효율성 지표 제공
+### 2. LoRA/QLoRA 지원 시스템
+#### 🔗 LoRA (Low-Rank Adaptation)
+- **효율적인 파인튜닝**: 전체 모델 대신 일부 파라미터만 훈련
+- **메모리 절약**: GPU 메모리 사용량 대폭 감소
+- **빠른 어댑터 전환**: 여러 작업별 어댑터를 빠르게 교체
+#### 📈 QLoRA (Quantized LoRA)
+- **4비트 양자화**: 모델 크기와 메모리 사용량 추가 감소
+- **고품질 훈련**: 양자화된 모델에서도 높은 품질의 훈련 가능
+- **하드웨어 효율성**: 저사양 GPU에서도 훈련 가능
+#### 🎯 지원 모델
+- **Causal Language Models**: GPT, LLaMA, Kanana 등
+- **Sequence-to-Sequence**: T5, BART 등
+- **Classification Models**: BERT, RoBERTa 등
+## 🛠️ 설치 및 설정
+### 1. 의존성 설치
+```bash
+pip install -r requirements.txt
+```
+### 2. 추가 패키지 설치
+```bash
+# LoRA/QLoRA 지원
+pip install peft>=0.7.0
+pip install bitsandbytes>=0.41.0
+# 선택적: 더 나은 성능을 위한 패키지
+pip install accelerate
+pip install transformers[torch]
+```
+### 3. 환경 변수 설정
+```bash
+# GPU 사용 설정
+export CUDA_VISIBLE_DEVICES=0
+# 메모리 최적화
+export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
+```
+## 📖 사용법
+### 1. 컨텍스트 관리
+#### 기본 사용법
+```python
+import requests
+# 시스템 프롬프트 설정
+response = requests.post("http://localhost:8001/context/set-system-prompt",
+                        data={"prompt": "당신은 한국어 AI 어시스턴트입니다."})
+# 사용자 메시지 추가
+response = requests.post("http://localhost:8001/context/add-message",
+                        data={
+                            "role": "user",
+                            "content": "안녕하세요!",
+                            "metadata": '{"session_id": "session_1"}'
+                        })
+# 어시스턴트 응답 추가
+response = requests.post("http://localhost:8001/context/add-message",
+                        data={
+                            "role": "assistant",
+                            "content": "안녕하세요! 무엇을 도와드릴까요?",
+                            "metadata": '{"session_id": "session_1"}'
+                        })
+# 컨텍스트 조회
+response = requests.get("http://localhost:8001/context/get")
+context = response.json()["context"]
+```
+#### 고급 기능
+```python
+# 컨텍스트 검색
+response = requests.get("http://localhost:8001/context/search?query=날씨&max_results=5")
+# 컨텍스트 내보내기
+response = requests.post("http://localhost:8001/context/export",
+                        data={"file_path": "my_context.json"})
+# 컨텍스트 가져오기
+response = requests.post("http://localhost:8001/context/import",
+                        data={"file_path": "my_context.json"})
+# 컨텍스트 통계
+response = requests.get("http://localhost:8001/context/summary")
+```
+### 2. LoRA/QLoRA 사용
+#### 기본 모델 로드
+```python
+# 기본 모델 로드
+response = requests.post("http://localhost:8001/lora/load-base-model",
+                        data={
+                            "model_path": "/path/to/your/model",
+                            "model_type": "causal_lm"
+                        })
+```
+#### LoRA 설정 생성
+```python
+# LoRA 설정 생성
+response = requests.post("http://localhost:8001/lora/create-config",
+                        data={
+                            "r": 16,                    # LoRA 랭크
+                            "lora_alpha": 32,           # LoRA 알파
+                            "target_modules": "q_proj,v_proj,k_proj,o_proj",  # 타겟 모듈
+                            "lora_dropout": 0.1,        # 드롭아웃
+                            "bias": "none",             # 바이어스 처리
+                            "task_type": "CAUSAL_LM"    # 작업 타입
+                        })
+```
+#### 어댑터 적용 및 사용
+```python
+# LoRA 어댑터 적용
+response = requests.post("http://localhost:8001/lora/apply",
+                        data={"adapter_name": "my_adapter"})
+# LoRA 모델로 텍스트 생성
+response = requests.post("http://localhost:8001/lora/generate",
+                        data={
+                            "prompt": "안녕하세요!",
+                            "max_length": 100,
+                            "temperature": 0.7
+                        })
+# 어댑터 저장
+response = requests.post("http://localhost:8001/lora/save-adapter",
+                        data={"adapter_name": "my_adapter"})
+```
+### 3. 통합 사용 (컨텍스트 + LoRA)
+```python
+# 컨텍스트를 사용한 텍스트 생성
+response = requests.post("http://localhost:8001/generate",
+                        data={
+                            "prompt": "이전 대화를 참고해서 답변해주세요.",
+                            "use_context": "true",
+                            "session_id": "session_1"
+                        })
+```
+## 🔍 API 엔드포인트
+### 컨텍스트 관리
+| 메서드 | 엔드포인트 | 설명 |
+|--------|------------|------|
+| POST | `/context/set-system-prompt` | 시스템 프롬프트 설정 |
+| POST | `/context/add-message` | 메시지 추가 |
+| GET | `/context/get` | 컨텍스트 조회 |
+| GET | `/context/summary` | 컨텍스트 요약 |
+| POST | `/context/clear` | 컨텍스트 초기화 |
+| DELETE | `/context/message/{message_id}` | 메시지 제거 |
+| PUT | `/context/message/{message_id}` | 메시지 수정 |
+| GET | `/context/search` | 컨텍스트 검색 |
+| POST | `/context/export` | 컨텍스트 내보내기 |
+| POST | `/context/import` | 컨텍스트 가져오기 |
+### LoRA 관리
+| 메서드 | 엔드포인트 | 설명 |
+|--------|------------|------|
+| POST | `/lora/load-base-model` | 기본 모델 로드 |
+| POST | `/lora/create-config` | LoRA 설정 생성 |
+| POST | `/lora/apply` | LoRA 어댑터 적용 |
+| POST | `/lora/load-adapter` | 저장된 어댑터 로드 |
+| POST | `/lora/save-adapter` | 어댑터 저장 |
+| GET | `/lora/adapters` | 어댑터 목록 |
+| GET | `/lora/stats` | 어댑터 통계 |
+| POST | `/lora/switch` | 어댑터 전환 |
+| POST | `/lora/unload` | 어댑터 언로드 |
+| POST | `/lora/generate` | LoRA 모델로 생성 |
+| POST | `/lora/merge` | 어댑터 병합 |
+## 📊 성능 최적화
+### 1. 메모리 관리
+- **컨텍스트 압축**: 자동 메모리 최적화
+- **토큰 제한**: 설정 가능한 최대 토큰 수
+- **세션 분리**: 독립적인 메모리 공간
+### 2. LoRA 최적화
+- **랭크 조정**: r 값으로 정확도와 효율성 균형
+- **타겟 모듈 선택**: 필요한 레이어만 선택적 훈련
+- **그래디언트 체크포인팅**: 메모리 사용량 감소
+### 3. 하드웨어 최적화
+- **GPU 메모리**: 효율적인 메모리 할당
+- **CPU 스레드**: 멀티스레딩 최적화
+- **배치 처리**: 대량 데이터 처리 최적화
+## 🧪 테스트
+### 테스트 스크립트 실행
+```bash
+python test_context_lora.py
+```
+### 수동 테스트
+```bash
+# 서버 시작
+python run_server.py
+# 다른 터미널에서 테스트
+curl -X POST "http://localhost:8001/context/set-system-prompt" \
+     -d "prompt=당신은 한국어 AI 어시스턴트입니다."
+curl -X GET "http://localhost:8001/context/summary"
+```
+## 🔧 설정 옵션
+### 컨텍스트 관리자 설정
+```python
+# ContextManager 초기화 시 설정
+context_manager = ContextManager(
+    max_tokens=4000,        # 최대 토큰 수
+    max_turns=20,           # 최대 대화 턴 수
+    strategy="sliding_window"  # 압축 전략
+)
+```
+### LoRA 설정
+```python
+# LoRA 설정 예시
+lora_config = LoraConfig(
+    r=16,                    # LoRA 랭크 (높을수록 정확도 향상, 메모리 증가)
+    lora_alpha=32,          # LoRA 알파 (스케일링 팩터)
+    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],  # 타겟 모듈
+    lora_dropout=0.1,       # 드롭아웃 비율
+    bias="none",            # 바이어스 처리 방식
+    task_type="CAUSAL_LM"   # 작업 타입
+)
+```
+## 🚨 주의사항
+### 1. 메모리 관리
+- **컨텍스트 길이**: 너무 긴 컨텍스트는 메모리 부족을 야기할 수 있습니다
+- **LoRA 랭크**: 높은 랭크는 정확도를 높이지만 메모리 사용량도 증가합니다
+### 2. 성능 고려사항
+- **GPU 메모리**: LoRA 훈련 시 충분한 GPU 메모리가 필요합니다
+- **CPU 사용량**: 컨텍스트 압축 시 CPU 리소스가 사용됩니다
+### 3. 호환성
+- **모델 타입**: 모든 모델이 LoRA를 지원하지 않을 수 있습니다
+- **버전 호환성**: PEFT와 Transformers 버전 호환성을 확인하세요
+## 📚 추가 자료
+### 관련 문서
+- [PEFT 공식 문서](https://huggingface.co/docs/peft)
+- [LoRA 논문](https://arxiv.org/abs/2106.09685)
+- [QLoRA 논문](https://arxiv.org/abs/2305.14314)
+### 예제 코드
+- `test_context_lora.py`: 통합 테스트 스크립트
+- `examples/`: 추가 사용 예제들
+### 커뮤니티
+- [Hugging Face PEFT](https://huggingface.co/docs/peft)
+- [GitHub Issues](https://github.com/your-repo/issues)
+## 🤝 기여하기
+버그 리포트, 기능 제안, 코드 기여를 환영합니다!
+1. 이슈 생성
+2. 포크 후 브랜치 생성
+3. 변경사항 커밋
+4. Pull Request 생성
+## 📄 라이선스
+이 프로젝트는 MIT 라이선스 하에 배포됩니다.
+---
+**Lily LLM** - 더 스마트한 AI 대화를 위한 컨텍스트 관리 및 LoRA 시스템 🚀

download_kanana_model.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import torch
+from transformers import AutoModelForVision2Seq, AutoProcessor
+from dotenv import load_dotenv
+# --- 설정 부분 ---
+MODEL_NAME = "kakaocorp/kanana-1.5-v-3b-instruct"
+SAVE_DIRECTORY = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+# --- 설정 끝 ---
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+print("="*60)
+print(f"'{MODEL_NAME}' 모델 및 프로세서 공식 다운로드를 시작합니다.")
+print(f"저장 경로: {SAVE_DIRECTORY}")
+print("="*60)
+try:
+    # 1. 공식 AutoProcessor 로드 및 다운로드
+    print("\n[1/2] 프로세서(Tokenizer+Image Processor) 다운로드 중...")
+    processor = AutoProcessor.from_pretrained(
+        MODEL_NAME,
+        token=HF_TOKEN,
+        trust_remote_code=True
+    )
+    print("✅ 프로세서 다운로드 성공!")
+    # 2. 공식 AutoModelForVision2Seq 로드 및 다운로드
+    print("\n[2/2] 모델 다운로드 중... (시간이 걸릴 수 있습니다)")
+    model = AutoModelForVision2Seq.from_pretrained(
+        MODEL_NAME,
+        token=HF_TOKEN,
+        torch_dtype=torch.bfloat16, # 공식 예제와 동일하게 bfloat16 사용
+        trust_remote_code=True
+    )
+    print("✅ 모델 다운로드 성공!")
+    # 3. 로컬 경로에 모델과 프로세서 모두 저장
+    print(f"\n[+] '{SAVE_DIRECTORY}' 경로에 모든 파일 저장 중...")
+    if not os.path.exists(SAVE_DIRECTORY):
+        os.makedirs(SAVE_DIRECTORY)
+    processor.save_pretrained(SAVE_DIRECTORY)
+    model.save_pretrained(SAVE_DIRECTORY)
+    print("✅ 모델과 프로세서 저장 완료!")
+    print("\n" + "="*60)
+    print("🎉 모든 작업이 성공적으로 완료되었습니다!")
+    print("이제 `kanana_1_5_v_3b_instruct.py` 파일을 수정하고 서버를 실행하세요.")
+    print("="*60)
+except Exception as e:
+    import traceback
+    print(f"\n❌ 오류 발생: {e}")
+    traceback.print_exc()
+    print("\n허깅페이스 토큰이 올바른지, `huggingface-cli login`을 실행했는지 확인하세요.")

lily_llm_api/app_v2.py CHANGED Viewed

@@ -21,6 +21,10 @@ import os
 import json
 from pathlib import Path
 from .models import get_model_profile, list_available_models
 from lily_llm_core.rag_processor import rag_processor
 from lily_llm_core.document_processor import document_processor
@@ -45,10 +49,21 @@ from lily_llm_core.vector_store_manager import vector_store_manager
 # from latex_ocr_faiss_simple import LatexOCRFAISSSimple
 # 멀티모달 RAG 프로세서 추가
-# from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
 # FastAPI 앱 생성
 app = FastAPI(
@@ -160,6 +175,7 @@ class TokenResponse(BaseModel):
 # 전역 변수
 model = None
 tokenizer = None
 current_profile = None
 model_loaded = False
 image_processor = None
@@ -240,66 +256,40 @@ async def load_model_async(model_id: str):
     await loop.run_in_executor(executor, load_model_sync, model_id)
 def load_model_sync(model_id: str):
-    """모델 및 관련 프로세서를 동기적으로 로딩"""
-    global model, tokenizer, image_processor, current_profile
     try:
-        # 기존 모델 언로드 (메모리 정리)
         if model is not None:
             logger.info("🗑️ 기존 모델 언로드 중...")
             del model
             del tokenizer
-            model = None
-            tokenizer = None
             import gc
             gc.collect()
             logger.info("✅ 기존 모델 언로드 완료")
         logger.info(f"📥 '{model_id}' 모델 로딩 시작...")
         current_profile = get_model_profile(model_id)
-        model, tokenizer = current_profile.load_model()
-        # 모델 정보 디버그
-        if hasattr(model, 'num_parameters'):
-            logger.info(f"📊 모델 파라미터 수: {model.num_parameters():,}")
-        # 멀티모달 토크나이저 설정
-        if getattr(current_profile, 'multimodal', False):
-            logger.info("🔧 토크나이저 멀티모달 기능 활성화...")
-            tokenizer.mllm_setup(num_visual_tokens=1)
-            from transformers import AutoImageProcessor
-            # 환경에 따라 이미지 프로세서 로딩 분기
-            if current_profile.is_local:
-                # 로컬 환경: 로컬 경로 사용
-                logger.info("🏠 로컬 환경에서 이미지 프로세서 로딩")
-                image_processor = AutoImageProcessor.from_pretrained(
-                    current_profile.local_path,
-                    trust_remote_code=True,
-                    local_files_only=True,
-                )
-            else:
-                # 서버 환경: 모델명 사용, HF 토큰 필요
-                logger.info("☁️ 서버 환경에서 이미지 프로세서 로딩")
-                import os
-                HF_TOKEN = os.getenv("HF_TOKEN")
-                image_processor = AutoImageProcessor.from_pretrained(
-                    current_profile.model_name,  # local_path 대신 model_name 사용
-                    token=HF_TOKEN,
-                    trust_remote_code=True,
-                    local_files_only=False,  # 서버에서는 False
-                )
-            logger.info("✅ 이미지 프로세서 로드 성공!")
         else:
-            image_processor = None
         logger.info(f"✅ '{current_profile.display_name}' 모델 로딩 완료!")
     except Exception as e:
         logger.error(f"❌ load_model_sync 실패: {e}")
         import traceback
         logger.error(f"🔍 전체 에러: {traceback.format_exc()}")
-        raise
 def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
                  temperature: Optional[float] = None, top_p: Optional[float] = None,
@@ -474,324 +464,172 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
         logger.error(f"❌ 응답 추출 중 오류: {e}")
         raise HTTPException(status_code=500, detail=f"응답 추출 중 오류가 발생했습니다: {str(e)}")
 @app.post("/generate", response_model=GenerateResponse)
 async def generate(prompt: str = Form(...),
                   image1: UploadFile = File(None),
                   image2: UploadFile = File(None),
                   image3: UploadFile = File(None),
                   image4: UploadFile = File(None),
-                  max_length: Optional[int] = Form(None),
-                  temperature: Optional[float] = Form(None),
-                  top_p: Optional[float] = Form(None),
-                  do_sample: Optional[bool] = Form(None)):
-    """[통합 엔드포인트] 텍스트 및 멀티모달 생성"""
     if not model_loaded:
         raise HTTPException(status_code=503, detail="모델이 로드되지 않았습니다.")
     start_time = time.time()
-    loop = asyncio.get_event_loop()
-    # 다중 이미지 처리
-    image_data_list = []
-    for img in [image1, image2, image3, image4]:
-        if img:
-            image_data = await img.read()
-            image_data_list.append(image_data)
-    # 단일 실행 보장: generate_sync는 오직 한 번만 호출
-    result = await loop.run_in_executor(
-        executor,
-        generate_sync,
-        prompt,
-        image_data_list,
-        max_length,
-        temperature,
-        top_p,
-        do_sample,
-    )
-    processing_time = time.time() - start_time
-    logger.info(f"✅ 생성 완료 ({processing_time:.2f}초), 이미지 처리: {result['image_processed']}")
-    return GenerateResponse(
-        generated_text=result["text"],
-        processing_time=processing_time,
-        model_name=current_profile.display_name,
-        image_processed=result["image_processed"]
-    )
-@app.post("/generate-multimodal", response_model=MultimodalGenerateResponse)
-async def generate_multimodal(
-    prompt: str = Form(...),
-    image: UploadFile = File(None),
-    model_id: Optional[str] = Form(None),
-    max_length: Optional[int] = Form(None),
-    temperature: Optional[float] = Form(None),
-    top_p: Optional[float] = Form(None),
-    do_sample: Optional[bool] = Form(None)
-):
-    """멀티모달 텍스트 생성 (이미지 + 텍스트)"""
-    start_time = time.time()
-    try:
-        # 모델 로드 확인
-        if not model_loaded or not model or not tokenizer or not current_profile:
-            raise HTTPException(status_code=500, detail="모델이 로드되지 않았습니다")
-        # Kanana 모델이 아니면 일반 텍스트 생성으로 리다이렉트
-        if "kanana" not in current_profile.model_name.lower():
-            logger.warning("멀티모달 요청이지만 Kanana 모델이 아님 - 일반 텍스트 생성으로 처리")
-            # 일반 generate 엔드포인트로 리다이렉트
-            loop = asyncio.get_event_loop()
-            result = await loop.run_in_executor(executor, generate_sync, prompt, None,
-                                              max_length, temperature, top_p, do_sample)
-            return MultimodalGenerateResponse(
-                generated_text=result["text"],
-                processing_time=time.time() - start_time,
-                model_name=current_profile.display_name,
-                model_id=model_id or current_profile.get_model_info()["model_name"],
-                image_processed=False
-            )
-        logger.info(f"🖼️ 멀티모달 생성 시작: '{prompt}'")
-        # 이미지 처리
-        pixel_values = None
-        image_metas = None
-        image_processed = False
-        if image:
-            logger.info(f"📸 이미지 처리 중: {image.filename}")
             try:
-                # 이미지 파일 읽기
-                image_data = await image.read()
-                pil_image = Image.open(io.BytesIO(image_data))
-                # 이미지 전처리 (Kanana 모델에 맞게)
-                logger.info(f"✅ 이미지 로드 완료: {pil_image.size}")
-                # Kanana 모델의 이미지 전처리
-                from transformers import AutoImageProcessor
-                # 이미지 프로세서 로드
-                image_processor = AutoImageProcessor.from_pretrained(
-                    current_profile.get_model_info()["local_path"],
-                    trust_remote_code=True
-                )
-                # 이미지 전처리 (Kanana 방식)
-                processed_images = image_processor(pil_image)
-                pixel_values = processed_images["pixel_values"]
-                image_metas = processed_images["image_meta"]
-                # 디바이스로 이동
-                pixel_values = pixel_values.to(model.device)
-                if image_metas and "vision_grid_thw" in image_metas:
-                    # vision_grid_thw가 스칼라가 아닌 텐서인지 확인
-                    grid_thw = image_metas["vision_grid_thw"]
-                    if isinstance(grid_thw, (list, tuple)):
-                        grid_thw = torch.tensor(grid_thw)
-                    elif not isinstance(grid_thw, torch.Tensor):
-                        grid_thw = torch.tensor([grid_thw])
-                    image_metas["vision_grid_thw"] = grid_thw.to(model.device)
-                # 디버깅을 위한 로그 추가
-                logger.info(f"🔍 pixel_values 형태: {pixel_values.shape}")
-                logger.info(f"🔍 image_metas keys: {list(image_metas.keys()) if image_metas else 'None'}")
-                if image_metas and "vision_grid_thw" in image_metas:
-                    logger.info(f"🔍 vision_grid_thw 형태: {image_metas['vision_grid_thw'].shape}")
-                image_processed = True
-                logger.info(f"✅ 이미지 텐서 변환 완료: {pixel_values.shape}")
             except Exception as e:
-                logger.error(f"❌ 이미지 처리 실패: {e}")
-                pixel_values = None
-                image_metas = None
-                image_processed = False
-                logger.info("📝 이미지 없이 텍스트만 처리합니다.")
-        else:
-            logger.info("📸 이미지 없음 - 텍스트만 처리")
-            image_processed = False
-        # Kanana 멀티모달 프롬프트 형식
-        system_prompt = "당신은 한국어로 대화하는 AI 어시스턴트입니다. 모든 응답은 한국어로 해주세요."
-        if image_processed:
-            formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
-            logger.info(f"🖼️ Kanana 멀티모달 프롬프트: '{formatted_prompt}'")
         else:
-            formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
-            logger.info(f"🔍 Kanana 텍스트 전용 프롬프트: '{formatted_prompt}'")
-        # 토크나이징
-        inputs = tokenizer(
-            formatted_prompt,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=100,
-        )
-        if 'token_type_ids' in inputs:
-            del inputs['token_type_ids']
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
-        logger.info(f"입력 토큰 수: {inputs['input_ids'].shape[1]}")
-        # Kanana용 생성 설정
-        max_new_tokens = max_length or 100
-        temperature = temperature or 0.7
-        top_p = top_p or 0.9
-        do_sample = do_sample if do_sample is not None else True
-        with torch.no_grad():
-            if image_processed and pixel_values is not None:
-                # Kanana 모델의 실제 멀티모달 처리 시도
-                logger.info("🔍 Kanana 모델의 실제 멀티모달 처리 시도...")
-                try:
-                    # vision_grid_thw를 올바른 형태로 변환 시도
-                    if 'vision_grid_thw' in image_metas:
-                        grid_thw = image_metas['vision_grid_thw']
-                        if isinstance(grid_thw, (list, tuple)):
-                            grid_thw = torch.tensor(grid_thw)
-                        elif not isinstance(grid_thw, torch.Tensor):
-                            grid_thw = torch.tensor([grid_thw])
-                        image_metas['vision_grid_thw'] = grid_thw.to(model.device)
-                    # forward_vision과 forward_projector를 분리해서 시도
-                    visual_features = model.forward_vision(pixel_values, image_metas)
-                    visual_embeds = model.forward_projector(visual_features, image_metas)
-                    # 텍스트 임베딩 생성
-                    text_embeds = model.embed_text_tokens(inputs["input_ids"])
-                    # 시각적 임베딩을 텍스트 임베딩과 같은 dtype으로 변환
-                    target_dtype = text_embeds.dtype
-                    visual_embeds_converted = visual_embeds.to(target_dtype)
-                    # 텍스트 임베딩을 평면화
-                    from einops import rearrange
-                    flattened_text_embeds = rearrange(text_embeds, "b l d -> (b l) d")
-                    flattened_input_ids = rearrange(inputs["input_ids"], "b l -> (b l)")
-                    # -1 토큰 위치에 시각적 임베딩 삽입
-                    mask = (flattened_input_ids == -1)
-                    if mask.sum() > 0:
-                        flattened_text_embeds[mask] = visual_embeds_converted[:mask.sum()]
-                    # 다시 배치 형태로 재구성
-                    input_embeds = rearrange(flattened_text_embeds, "(b l) d -> b l d", b=inputs["input_ids"].shape[0])
-                    attention_mask = inputs["attention_mask"]
-                    # 언어 모델의 dtype에 맞춰 input_embeds 변환
-                    language_model_dtype = next(model.language_model.parameters()).dtype
-                    if input_embeds.dtype != language_model_dtype:
-                        input_embeds = input_embeds.to(language_model_dtype)
-                    # Kanana 모델의 원래 generate 메서드 사용
-                    outputs = model.generate(
-                        input_ids=inputs["input_ids"],
-                        pixel_values=pixel_values,
-                        image_metas=image_metas,
-                        attention_mask=inputs["attention_mask"],
-                        max_new_tokens=max_new_tokens,
-                        do_sample=do_sample,
-                        temperature=temperature,
-                        top_k=40,
-                        top_p=top_p,
-                        repetition_penalty=1.1,
-                        no_repeat_ngram_size=2,
-                        pad_token_id=tokenizer.eos_token_id,
-                        eos_token_id=tokenizer.eos_token_id,
-                        use_cache=True
-                    )
-                    logger.info("✅ 실제 멀티모달 생성 성공!")
-                except Exception as e:
-                    logger.error(f"❌ 실제 멀티모달 처리 실패: {e}")
-                    logger.error(f"🔍 오류 타입: {type(e).__name__}")
-                    import traceback
-                    logger.error(f"🔍 상세 오류: {traceback.format_exc()}")
-                    logger.info("🔄 fallback: 텍스트 전용 처리로 전환")
-                    # fallback: 텍스트 전용 처리
-                    enhanced_prompt = f"[이미지 첨부됨] {prompt}"
-                    enhanced_formatted_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{enhanced_prompt}<|im_end|>\n<|im_start|>assistant\n"
-                    enhanced_inputs = tokenizer(
-                        enhanced_formatted_prompt,
-                        return_tensors="pt",
-                        padding=True,
-                        truncation=True,
-                        max_length=256
-                    )
-                    if 'token_type_ids' in enhanced_inputs:
-                        del enhanced_inputs['token_type_ids']
-                    enhanced_inputs = {k: v.to(model.device) for k, v in enhanced_inputs.items()}
-                    outputs = model.language_model.generate(
-                        input_ids=enhanced_inputs["input_ids"],
-                        attention_mask=enhanced_inputs["attention_mask"],
-                        max_new_tokens=max_new_tokens,
-                        do_sample=do_sample,
-                        temperature=temperature,
-                        top_k=40,
-                        top_p=top_p,
-                        repetition_penalty=1.1,
-                        no_repeat_ngram_size=2,
-                        pad_token_id=tokenizer.eos_token_id,
-                        eos_token_id=tokenizer.eos_token_id,
-                        use_cache=True
-                    )
             else:
-                # 텍스트 전용 생성
-                logger.info("📝 텍스트 전용 생성 시작")
-                outputs = model.generate(
-                    input_ids=inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
-                    max_new_tokens=max_new_tokens,
-                    do_sample=do_sample,
-                    temperature=temperature,
-                    top_k=40,
-                    top_p=top_p,
-                    repetition_penalty=1.1,
-                    no_repeat_ngram_size=2,
-                    pad_token_id=tokenizer.eos_token_id,
-                    eos_token_id=tokenizer.eos_token_id,
-                    use_cache=True
-                )
-        # 응답 추출
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        logger.info(f"생성된 토큰 수: {outputs.shape[1]}")
-        logger.info(f"디코딩된 전체 텍스트: '{generated_text}'")
-        # Kanana 멀티모달 응답 추출
         if "<|im_start|>assistant" in generated_text:
             response = generated_text.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
-            logger.info(f"🔍 Kanana 멀티모달 응답 추출: '{response}'")
         else:
             response = generated_text.strip()
-            logger.info(f"🔍 Kanana 전체 텍스트: '{response}'")
         processing_time = time.time() - start_time
-        logger.info(f"멀티모달 생성 완료: {processing_time:.2f}초, 텍스트 길이: {len(response)}, 이미지 처리: {image_processed}")
-        return MultimodalGenerateResponse(
-            generated_text=response,
-            processing_time=processing_time,
-            model_name=current_profile.display_name,
-            model_id=model_id or current_profile.get_model_info()["model_name"],
-            image_processed=image_processed
-        )
     except Exception as e:
-        processing_time = time.time() - start_time
-        logger.error(f"❌ 멀티모달 생성 오류: {e} (소요 시간: {processing_time:.2f}초)")
         raise HTTPException(status_code=500, detail=f"멀티모달 생성 실패: {str(e)}")
 @app.get("/models")
 async def list_models():
     """사용 가능한 모델 목록"""
@@ -2106,6 +1944,462 @@ async def get_latex_ocr_faiss_status():
         logger.error(f"상태 확인 오류: {e}")
         return {"status": "error", "error": str(e)}
 # ============================================================================
 # 멀티모달 RAG 시스템 엔드포인트
 # ============================================================================
@@ -2168,7 +2462,7 @@ async def generate_hybrid_rag_response(
     use_text: bool = Form(True),
     use_image: bool = Form(True),
     use_latex: bool = Form(True),
-    use_latex_ocr: bool = Form(True),
     max_length: Optional[int] = Form(None),
     temperature: Optional[float] = Form(None),
     top_p: Optional[float] = Form(None),
@@ -2220,7 +2514,7 @@ async def get_hybrid_rag_status():
             "text_rag_available": True,
             "image_rag_available": True,
             "latex_rag_available": True,
-            "latex_ocr_faiss_available": hybrid_rag_processor.latex_ocr_faiss_integrated is not None,
             "status": "ready"
         }
     except Exception as e:

 import json
 from pathlib import Path
+# logging 설정을 먼저 구성
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
 from .models import get_model_profile, list_available_models
 from lily_llm_core.rag_processor import rag_processor
 from lily_llm_core.document_processor import document_processor
 # from latex_ocr_faiss_simple import LatexOCRFAISSSimple
 # 멀티모달 RAG 프로세서 추가
+from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
+# 컨텍스트 관리자 및 LoRA 관리자 추가
+from lily_llm_core.context_manager import get_context_manager, context_manager
+# LoRA 관리자 import (선택적)
+try:
+    from lily_llm_core.lora_manager import get_lora_manager, lora_manager
+    LORA_AVAILABLE = True
+    logger.info("✅ LoRA 관리자 import 성공")
+except ImportError as e:
+    logger.warning(f"⚠️ LoRA 관리자 import 실패: {e}")
+    LORA_AVAILABLE = False
+    lora_manager = None
+    get_lora_manager = None
 # FastAPI 앱 생성
 app = FastAPI(
 # 전역 변수
 model = None
 tokenizer = None
+processor = None
 current_profile = None
 model_loaded = False
 image_processor = None
     await loop.run_in_executor(executor, load_model_sync, model_id)
 def load_model_sync(model_id: str):
+    """모델 및 관련 프로세서를 동기적으로 로딩 (최종 수정본)"""
+    global model, tokenizer, processor, current_profile
     try:
         if model is not None:
             logger.info("🗑️ 기존 모델 언로드 중...")
             del model
             del tokenizer
+            del processor
+            model, tokenizer, processor = None, None, None
             import gc
             gc.collect()
             logger.info("✅ 기존 모델 언로드 완료")
         logger.info(f"📥 '{model_id}' 모델 로딩 시작...")
         current_profile = get_model_profile(model_id)
+        # 이제 load_model은 (model, processor)를 반환합니다.
+        model, processor = current_profile.load_model()
+        # processor에서 tokenizer를 꺼내 전역 변수에 할당합니다.
+        if hasattr(processor, 'tokenizer'):
+            tokenizer = processor.tokenizer
         else:
+            # processor 자체가 tokenizer 역할도 할 수 있는 경우
+            tokenizer = processor
         logger.info(f"✅ '{current_profile.display_name}' 모델 로딩 완료!")
     except Exception as e:
         logger.error(f"❌ load_model_sync 실패: {e}")
         import traceback
         logger.error(f"🔍 전체 에러: {traceback.format_exc()}")
+        raise
 def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
                  temperature: Optional[float] = None, top_p: Optional[float] = None,
         logger.error(f"❌ 응답 추출 중 오류: {e}")
         raise HTTPException(status_code=500, detail=f"응답 추출 중 오류가 발생했습니다: {str(e)}")
+# Patched version of /generate and /generate-multimodal with manual greedy decoding loop for Kanana
+from fastapi import HTTPException, UploadFile, File, Form
+from PIL import Image
+import io
+import time
+import torch
+from typing import Optional
 @app.post("/generate", response_model=GenerateResponse)
 async def generate(prompt: str = Form(...),
                   image1: UploadFile = File(None),
                   image2: UploadFile = File(None),
                   image3: UploadFile = File(None),
                   image4: UploadFile = File(None),
+                  use_context: bool = Form(True),
+                  session_id: str = Form(None)):
+    global model_loaded, current_profile, model, tokenizer, processor
     if not model_loaded:
         raise HTTPException(status_code=503, detail="모델이 로드되지 않았습니다.")
     start_time = time.time()
+    if use_context:
+        context_manager.add_user_message(prompt, metadata={"session_id": session_id})
+    pil_images = []
+    for img_file in [image1, image2, image3, image4]:
+        if img_file:
             try:
+                data = await img_file.read()
+                pil = Image.open(io.BytesIO(data)).convert("RGB")
+                pil_images.append(pil)
             except Exception as e:
+                logger.warning(f"이미지 로드 실패: {e}")
+    try:
+        if getattr(current_profile, 'multimodal', False) and pil_images:
+            image_tokens = " ".join(["<image>"] * len(pil_images))
+            sample = {"image": pil_images,
+                      "conv": [{"role": "user", "content": image_tokens},
+                               {"role": "user", "content": prompt}]}
+            inputs = processor.batch_encode_collate([sample], padding_side="left", add_generation_prompt=True)
+            inputs = {k: (v.to(model.device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
         else:
+            formatted_prompt = current_profile.format_prompt(prompt) if hasattr(current_profile, 'format_prompt') else prompt
+            sample = {"image": [], "conv": [{"role": "user", "content": formatted_prompt}]}
+            inputs = processor.batch_encode_collate([sample], padding_side='left', add_generation_prompt=True)
+            inputs = {k: (v.to(model.device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
+        eot_id = tokenizer.convert_tokens_to_ids("<|eot_id|>")
+        # Manual greedy decoding loop
+        generated = inputs["input_ids"].clone()
+        for _ in range(64):
+            with torch.no_grad():
+                out = model(**inputs)
+            next_token = out.logits[:, -1, :].argmax(dim=-1, keepdim=True)
+            generated = torch.cat([generated, next_token], dim=-1)
+            logger.info(f"Step token: {next_token.item()}")
+            if next_token.item() == eot_id:
+                break
+            inputs["input_ids"] = generated
+        logger.info(f"Final Generated IDs: {generated[0].tolist()}")
+        generated_text = tokenizer.decode(generated[0], skip_special_tokens=True)
+        response = current_profile.extract_response(generated_text, prompt)
+        if use_context:
+            context_manager.add_assistant_message(response, metadata={"session_id": session_id})
+        processing_time = time.time() - start_time
+        return GenerateResponse(generated_text=response,
+                                processing_time=processing_time,
+                                model_name=current_profile.display_name,
+                                image_processed=len(pil_images) > 0)
+    except Exception as e:
+        logger.error(f"❌ 생성 중 오류 발생: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"모델 생성 중 오류 발생: {str(e)}")
+@app.post("/generate-multimodal", response_model=MultimodalGenerateResponse)
+async def generate_multimodal(prompt: str = Form(...),
+                             image: UploadFile = File(None),
+                             model_id: Optional[str] = Form(None),
+                             max_length: Optional[int] = Form(None),
+                             temperature: Optional[float] = Form(None),
+                             top_p: Optional[float] = Form(None),
+                             do_sample: Optional[bool] = Form(None)):
+    global model_loaded, current_profile, model, tokenizer, processor
+    if not model_loaded:
+        raise HTTPException(status_code=500, detail="모델이 로드되지 않았습니다")
+    start_time = time.time()
+    pil_image = None
+    if image:
+        try:
+            data = await image.read()
+            pil_image = Image.open(io.BytesIO(data)).convert("RGB")
+        except Exception as e:
+            logger.error(f"이미지 처리 실패: {e}")
+    try:
+        image_list = [pil_image] if pil_image else []
+        image_tokens = " ".join(["<image>"] * len(image_list)) if image_list else ""
+        conv = []
+        if image_list:
+            conv.append({"role": "user", "content": image_tokens})
+        conv.append({"role": "user", "content": prompt})
+        logger.info("=== STEP 1: building sample ===")
+        sample = {"image": [], "conv": [{"role": "user", "content": prompt}]}
+        logger.info("=== STEP 2: calling processor ===")
+        inputs = processor.batch_encode_collate([sample], padding_side='left', add_generation_prompt=True)
+        logger.info("=== STEP 3: processor returned ===")
+        for k, v in inputs.items():
+            if isinstance(v, torch.Tensor):
+                logger.info(f"Key {k}: tensor shape {v.shape}, dtype {v.dtype}, device {v.device}")
             else:
+                logger.info(f"Key {k}: {type(v)}")
+        logger.info("=== STEP 4: moving to device ===")
+        inputs = {k: (v.to(model.device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
+        logger.info("=== STEP 5: moved to device ===")
+        eot_id = tokenizer.convert_tokens_to_ids("<|eot_id|>")
+        # Manual greedy decoding loop
+        generated = inputs["input_ids"].clone()
+        for _ in range(64):
+            with torch.no_grad():
+                out = model(**inputs)
+            next_token = out.logits[:, -1, :].argmax(dim=-1, keepdim=True)
+            generated = torch.cat([generated, next_token], dim=-1)
+            logger.info(f"Step token: {next_token.item()}")
+            if next_token.item() == eot_id:
+                break
+            inputs["input_ids"] = generated
+        logger.info(f"Final Generated IDs: {generated[0].tolist()}")
+        generated_text = tokenizer.decode(generated[0], skip_special_tokens=True)
         if "<|im_start|>assistant" in generated_text:
             response = generated_text.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
         else:
             response = generated_text.strip()
         processing_time = time.time() - start_time
+        return MultimodalGenerateResponse(generated_text=response,
+                                          processing_time=processing_time,
+                                          model_name=current_profile.display_name,
+                                          model_id=model_id or current_profile.get_model_info().get("model_name"),
+                                          image_processed=bool(pil_image))
     except Exception as e:
+        logger.error(f"❌ 멀티모달 생성 오류: {e}")
         raise HTTPException(status_code=500, detail=f"멀티모달 생성 실패: {str(e)}")
 @app.get("/models")
 async def list_models():
     """사용 가능한 모델 목록"""
         logger.error(f"상태 확인 오류: {e}")
         return {"status": "error", "error": str(e)}
+# ============================================================================
+# 컨텍스트 관리 시스템 엔드포인트
+# ============================================================================
+@app.post("/context/set-system-prompt")
+async def set_system_prompt(prompt: str = Form(...)):
+    """시스템 프롬프트 설정"""
+    try:
+        context_manager.set_system_prompt(prompt)
+        return {
+            "success": True,
+            "message": "시스템 프롬프트가 설정되었습니다.",
+            "prompt_length": len(prompt)
+        }
+    except Exception as e:
+        logger.error(f"❌ 시스템 프롬프트 설정 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/context/add-message")
+async def add_context_message(
+    role: str = Form(...),  # 'user' 또는 'assistant'
+    content: str = Form(...),
+    message_id: str = Form(None),
+    metadata: str = Form("{}")  # JSON 문자열
+):
+    """컨텍스트에 메시지 추가"""
+    try:
+        import json
+        metadata_dict = json.loads(metadata) if metadata else {}
+        if role == "user":
+            msg_id = context_manager.add_user_message(content, message_id, metadata_dict)
+        elif role == "assistant":
+            msg_id = context_manager.add_assistant_message(content, message_id, metadata_dict)
+        else:
+            return {"success": False, "error": "잘못된 역할입니다. 'user' 또는 'assistant'를 사용하세요."}
+        return {
+            "success": True,
+            "message": "메시지가 컨텍스트에 추가되었습니다.",
+            "message_id": msg_id,
+            "context_summary": context_manager.get_context_summary()
+        }
+    except Exception as e:
+        logger.error(f"❌ 컨텍스트 메시지 추가 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.get("/context/get")
+async def get_context(
+    include_system: bool = True,
+    max_length: Optional[int] = None,
+    recent_turns: Optional[int] = None
+):
+    """현재 컨텍스트 조회"""
+    try:
+        if recent_turns:
+            context = context_manager.get_recent_context(recent_turns)
+        else:
+            context = context_manager.get_context(include_system, max_length)
+        return {
+            "success": True,
+            "context": context,
+            "context_summary": context_manager.get_context_summary(),
+            "memory_efficiency": context_manager.get_memory_efficiency()
+        }
+    except Exception as e:
+        logger.error(f"❌ 컨텍스트 조회 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.get("/context/summary")
+async def get_context_summary():
+    """컨텍스트 요약 정보 조회"""
+    try:
+        return {
+            "success": True,
+            "summary": context_manager.get_context_summary(),
+            "memory_efficiency": context_manager.get_memory_efficiency()
+        }
+    except Exception as e:
+        logger.error(f"❌ 컨텍스트 요약 조회 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/context/clear")
+async def clear_context():
+    """컨텍스트 초기화"""
+    try:
+        context_manager.clear_context()
+        return {
+            "success": True,
+            "message": "컨텍스트가 초기화되었습니다."
+        }
+    except Exception as e:
+        logger.error(f"❌ 컨텍스트 초기화 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.delete("/context/message/{message_id}")
+async def remove_context_message(message_id: str):
+    """컨텍스트에서 특정 메시지 제거"""
+    try:
+        success = context_manager.remove_message(message_id)
+        if success:
+            return {
+                "success": True,
+                "message": "메시지가 제거되었습니다.",
+                "context_summary": context_manager.get_context_summary()
+            }
+        else:
+            return {"success": False, "error": "메시지를 찾을 수 없습니다."}
+    except Exception as e:
+        logger.error(f"❌ 메시지 제거 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.put("/context/message/{message_id}")
+async def edit_context_message(
+    message_id: str,
+    new_content: str = Form(...)
+):
+    """컨텍스트 메시지 수정"""
+    try:
+        success = context_manager.edit_message(message_id, new_content)
+        if success:
+            return {
+                "success": True,
+                "message": "메시지가 수정되었습니다.",
+                "context_summary": context_manager.get_context_summary()
+            }
+        else:
+            return {"success": False, "error": "메시지를 찾을 수 없습니다."}
+    except Exception as e:
+        logger.error(f"❌ 메시지 수정 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.get("/context/search")
+async def search_context(query: str, max_results: int = 5):
+    """컨텍스트 내에서 검색"""
+    try:
+        results = context_manager.search_context(query, max_results)
+        return {
+            "success": True,
+            "query": query,
+            "results": results,
+            "total_results": len(results)
+        }
+    except Exception as e:
+        logger.error(f"❌ 컨텍스트 검색 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/context/export")
+async def export_context(file_path: str = Form(None)):
+    """컨텍스트 내보내기"""
+    try:
+        exported_path = context_manager.export_context(file_path)
+        return {
+            "success": True,
+            "message": "컨텍스트가 내보내졌습니다.",
+            "file_path": exported_path
+        }
+    except Exception as e:
+        logger.error(f"❌ 컨텍스트 내보내기 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/context/import")
+async def import_context(file_path: str = Form(...)):
+    """컨텍스트 가져오기"""
+    try:
+        success = context_manager.import_context(file_path)
+        if success:
+            return {
+                "success": True,
+                "message": "컨텍스트가 가져와졌습니다.",
+                "context_summary": context_manager.get_context_summary()
+            }
+        else:
+            return {"success": False, "error": "컨텍스트 가져오기에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ 컨텍스트 가져오기 실패: {e}")
+        return {"success": False, "error": str(e)}
+# ============================================================================
+# LoRA/QLoRA 관리 시스템 엔드포인트
+# ============================================================================
+@app.post("/lora/load-base-model")
+async def load_lora_base_model(
+    model_path: str = Form(...),
+    model_type: str = Form("causal_lm")
+):
+    """LoRA 기본 모델 로드"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        success = lora_manager.load_base_model(model_path, model_type)
+        if success:
+            return {
+                "success": True,
+                "message": "기본 모델이 로드되었습니다.",
+                "model_path": model_path,
+                "device": lora_manager.device
+            }
+        else:
+            return {"success": False, "error": "모델 로드에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ LoRA 기본 모델 로드 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/create-config")
+async def create_lora_config(
+    r: int = Form(16),
+    lora_alpha: int = Form(32),
+    target_modules: str = Form("q_proj,v_proj,k_proj,o_proj,gate_proj,up_proj,down_proj"),
+    lora_dropout: float = Form(0.1),
+    bias: str = Form("none"),
+    task_type: str = Form("CAUSAL_LM")
+):
+    """LoRA 설정 생성"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        # target_modules를 리스트로 변환
+        target_modules_list = target_modules.split(",") if target_modules else None
+        config = lora_manager.create_lora_config(
+            r=r,
+            lora_alpha=lora_alpha,
+            target_modules=target_modules_list,
+            lora_dropout=lora_dropout,
+            bias=bias,
+            task_type=task_type
+        )
+        return {
+            "success": True,
+            "message": "LoRA 설정이 생성되었습니다.",
+            "config": config.to_dict()
+        }
+    except Exception as e:
+        logger.error(f"❌ LoRA 설정 생성 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/apply")
+async def apply_lora_adapter(adapter_name: str = Form("default")):
+    """LoRA 어댑터를 모델에 적용"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        success = lora_manager.apply_lora_to_model(adapter_name)
+        if success:
+            return {
+                "success": True,
+                "message": "LoRA 어댑터가 적용되었습니다.",
+                "adapter_name": adapter_name,
+                "stats": lora_manager.get_adapter_stats()
+            }
+        else:
+            return {"success": False, "error": "LoRA 어댑터 적용에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ LoRA 어댑터 적용 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/load-adapter")
+async def load_lora_adapter(
+    adapter_path: str = Form(...),
+    adapter_name: str = Form(None)
+):
+    """저장된 LoRA 어댑터 로드"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        success = lora_manager.load_lora_adapter(adapter_path, adapter_name)
+        if success:
+            return {
+                "success": True,
+                "message": "LoRA 어댑터가 로드되었습니다.",
+                "adapter_name": lora_manager.current_adapter_name,
+                "stats": lora_manager.get_adapter_stats()
+            }
+        else:
+            return {"success": False, "error": "LoRA 어댑터 로드에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ LoRA 어댑터 로드 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/save-adapter")
+async def save_lora_adapter(
+    adapter_name: str = Form(None),
+    output_dir: str = Form(None)
+):
+    """LoRA 어댑터 저장"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        success = lora_manager.save_lora_adapter(adapter_name, output_dir)
+        if success:
+            return {
+                "success": True,
+                "message": "LoRA 어댑터가 저장되었습니다.",
+                "adapter_name": lora_manager.current_adapter_name
+            }
+        else:
+            return {"success": False, "error": "LoRA 어댑터 저장에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ LoRA 어댑터 저장 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.get("/lora/adapters")
+async def list_lora_adapters():
+    """사용 가능한 LoRA 어댑터 목록"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        adapters = lora_manager.list_available_adapters()
+        return {
+            "success": True,
+            "adapters": adapters
+        }
+    except Exception as e:
+        logger.error(f"❌ LoRA 어댑터 목록 조회 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.get("/lora/stats")
+async def get_lora_stats():
+    """현재 LoRA 어댑터 통계"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        stats = lora_manager.get_adapter_stats()
+        return {
+            "success": True,
+            "stats": stats
+        }
+    except Exception as e:
+        logger.error(f"❌ LoRA 통계 조회 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/switch")
+async def switch_lora_adapter(adapter_name: str = Form(...)):
+    """LoRA 어댑터 전환"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        success = lora_manager.switch_adapter(adapter_name)
+        if success:
+            return {
+                "success": True,
+                "message": f"LoRA 어댑터가 {adapter_name}으로 전환되었습니다.",
+                "adapter_name": adapter_name,
+                "stats": lora_manager.get_adapter_stats()
+            }
+        else:
+            return {"success": False, "error": "LoRA 어댑터 전환에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ LoRA 어댑터 전환 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/unload")
+async def unload_lora_adapter():
+    """LoRA 어댑터 언로드"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        success = lora_manager.unload_adapter()
+        if success:
+            return {
+                "success": True,
+                "message": "LoRA 어댑터가 언로드되었습니다."
+            }
+        else:
+            return {"success": False, "error": "LoRA 어댑터 언로드에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ LoRA 어댑터 언로드 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/generate")
+async def generate_with_lora(
+    prompt: str = Form(...),
+    max_length: int = Form(100),
+    temperature: float = Form(0.7)
+):
+    """LoRA 모델을 사용한 텍스트 생성"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        response = lora_manager.generate_text(prompt, max_length, temperature)
+        return {
+            "success": True,
+            "response": response,
+            "adapter_name": lora_manager.current_adapter_name
+        }
+    except Exception as e:
+        logger.error(f"❌ LoRA 텍스트 생성 실패: {e}")
+        return {"success": False, "error": str(e)}
+@app.post("/lora/merge")
+async def merge_lora_with_base(output_path: str = Form(None)):
+    """LoRA 어댑터를 기본 모델과 병합"""
+    if not LORA_AVAILABLE or lora_manager is None:
+        return {
+            "success": False,
+            "error": "LoRA 기능이 사용 불가능합니다. PEFT 라이브러리가 설치되지 않았습니다."
+        }
+    try:
+        success = lora_manager.merge_lora_with_base(output_path)
+        if success:
+            return {
+                "success": True,
+                "message": "LoRA 어댑터가 기본 모델과 병합되었습니다.",
+                "output_path": output_path or f"{lora_manager.base_model_path}_merged"
+            }
+        else:
+            return {"success": False, "error": "LoRA 어댑터 병합에 실패했습니다."}
+    except Exception as e:
+        logger.error(f"❌ LoRA 어댑터 병합 실패: {e}")
+        return {"success": False, "error": str(e)}
 # ============================================================================
 # 멀티모달 RAG 시스템 엔드포인트
 # ============================================================================
     use_text: bool = Form(True),
     use_image: bool = Form(True),
     use_latex: bool = Form(True),
+    use_latex_ocr: bool = Form(False),  # LaTeX-OCR 기능이 비활성화됨
     max_length: Optional[int] = Form(None),
     temperature: Optional[float] = Form(None),
     top_p: Optional[float] = Form(None),
             "text_rag_available": True,
             "image_rag_available": True,
             "latex_rag_available": True,
+            "latex_ocr_faiss_available": False,  # LaTeX-OCR 기능이 비활성화됨
             "status": "ready"
         }
     except Exception as e:

lily_llm_api/app_v2_250819_1305.py ADDED Viewed

The diff for this file is too large to render. See raw diff

lily_llm_api/app_v2_250819_1828.py ADDED Viewed

The diff for this file is too large to render. See raw diff

lily_llm_api/app_v2_250819_2008.py ADDED Viewed

The diff for this file is too large to render. See raw diff

lily_llm_api/models/{configuration.py → back/configuration.py} RENAMED Viewed

File without changes

lily_llm_api/models/{modeling.py → back/modeling.py} RENAMED Viewed

File without changes

lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED Viewed

@@ -9,6 +9,8 @@ import logging
 from transformers import AutoTokenizer
 import os
 from dotenv import load_dotenv
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
@@ -95,113 +97,66 @@ class Kanana15V3bInstructProfile:
             logger.error(f"❌ 환경변수 로드 실패: {e}")
     def load_model(self) -> Tuple[Any, Any]:
-        """환경에 따라 모델을 로드합니다."""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        import os
         from pathlib import Path
-        # 환경변수 로딩
-        self._load_environment_variables()
         try:
-            # 1. 로컬 캐시 경로가 있는지 확인
-            use_local = False
-            if self.local_path is not None:
-                local_model_path = Path(self.local_path)
-                use_local = local_model_path.exists() and any(local_model_path.iterdir())
-            if use_local:
-                logger.info(f"🗂️ 로컬 모델 사용: {self.local_path}")
-                model_path = self.local_path
-                local_files_only = True
-                # 로컬 모델의 경우 sys.path에 추가
-                if self.local_path not in sys.path:
-                    sys.path.insert(0, self.local_path)
-            else:
-                logger.info(f"🌐 Hugging Face Hub에서 다운로드: {self.model_name}")
-                model_path = self.model_name
-                local_files_only = False
-            # 환경별 추가 설정
-            if self.is_local:
-                logger.info("🏠 로컬 환경 설정 적용")
-                # 로컬 환경에서는 추가 설정이 필요할 수 있음
-            else:
-                logger.info("☁️ 서버 환경 설정 적용")
-                # 서버 환경에서는 캐시 디렉토리 등 설정
-            # DEBUG: 모델 경로와 설정 출력
-            logger.info(f"🔍 DEBUG: model_path = {model_path}")
-            logger.info(f"🔍 DEBUG: local_files_only = {local_files_only}")
-            logger.info(f"🔍 DEBUG: HF_TOKEN = {'있음' if HF_TOKEN else '없음'}")
-            logger.info(f"🔍 DEBUG: use_local = {use_local}")
-            logger.info(f"🔍 DEBUG: is_local = {self.is_local}")
-            # 2. 토크나이저 로드
-            logger.info(f"🔍 DEBUG: 토크나이저 로드 시작 - model_path={model_path}")
-            tokenizer = AutoTokenizer.from_pretrained(
                 model_path,
-                token=HF_TOKEN,
                 trust_remote_code=True,
-                local_files_only=local_files_only,
-                cache_dir="/app/cache/transformers" if not use_local else None
             )
-            logger.info(f"✅ 토크나이저 로드 완료 ({tokenizer.__class__.__name__})")
-            # 3. 모델 로드
-            logger.info(f"🔍 DEBUG: 모델 로드 시작 - use_local={use_local}")
-            from modeling import KananaVForConditionalGeneration
-            if use_local:
-                # 로컬 모델: 커스텀 모델링 클래스 사용
-                logger.info("🔍 DEBUG: 로컬 모델 로드 시도")
-                selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
-                logger.info(f"🔍 DEBUG: selected_dtype = {selected_dtype}")
-                model = KananaVForConditionalGeneration.from_pretrained(
-                    model_path,
-                    token=HF_TOKEN,
-                    trust_remote_code=True,
-                    torch_dtype=selected_dtype,
-                    local_files_only=True,
-                    # low_cpu_mem_usage=True,
-                ).to(DEVICE)
-            else:
-                # 서버 환경: KananaVForConditionalGeneration 직접 사용
-                logger.info("🔍 DEBUG: 서버 모델 로드 시도")
-                logger.info("���� DEBUG: modeling 모듈 import 시도")
-                try:
-                    logger.info("🔍 DEBUG: modeling 모듈 import 성공")
-                except ImportError as e:
-                    logger.error(f"🔍 DEBUG: modeling 모듈 import 실패 - {e}")
-                    raise
-                logger.info("🔍 DEBUG: KananaVForConditionalGeneration.from_pretrained 호출")
-                # CPU 환경에서 float16/bfloat16보다 float32가 더 안정적인 경우가 많음
-                selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
-                model = KananaVForConditionalGeneration.from_pretrained(
-                    model_path,
-                    token=HF_TOKEN,
-                    torch_dtype=selected_dtype,
-                    trust_remote_code=True,
-                    cache_dir="/app/cache/transformers",
-                    # device_map="auto",
-                    # low_cpu_mem_usage=True,
-                ).to(DEVICE)
-            logger.info(f"✅ 모델 로드 완료 ({model.__class__.__name__})")
-            return model, tokenizer
         except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}", exc_info=True)
-            if use_local and self.local_path in sys.path:
-                sys.path.remove(self.local_path)
-            raise
     def get_generation_config(self) -> Dict[str, Any]:
         # 모델 파라미터 최적화 설정, max_new_tokens : 생성되는 텍스트 길이 최대값 (이미지 설명을 위해 증가)
-        return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
     def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
         """

 from transformers import AutoTokenizer
 import os
 from dotenv import load_dotenv
+from pathlib import Path
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
             logger.error(f"❌ 환경변수 로드 실패: {e}")
     def load_model(self) -> Tuple[Any, Any]:
+        """모델 로드 (공식적인 방법 + 절대 경로 sys.path 수정 최종본)"""
+        logger.info(f"📥 {self.display_name} 모델 로드 중 (공식 방법)...")
+        import sys
         from pathlib import Path
+        import os  # <<< os 모듈 추가
+        # self.local_path를 절대 경로로 변환
+        absolute_model_path = os.path.abspath(self.local_path)
+        use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
+        # model_path 변수에 절대 경로를 사용
+        model_path = absolute_model_path if use_local else self.model_name
         try:
+            from transformers import AutoModelForVision2Seq, AutoProcessor
+            logger.info(f"🔍 모델 경로: {model_path} (local={'yes' if use_local else 'no'})")
+            if use_local and model_path not in sys.path:
+                logger.info(f"  -> sys.path에 임시 경로 추가: {model_path}")
+                sys.path.insert(0, model_path)
+            processor = AutoProcessor.from_pretrained(
                 model_path,
                 trust_remote_code=True,
+                local_files_only=use_local
             )
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            selected_dtype = torch.float16
+            # CPU bfloat16 지원 여부 체크 로직 수정
+            if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'float16')):
+                selected_dtype = torch.float16
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                torch_dtype=selected_dtype,
+                local_files_only=use_local,
+            ).to(device)
+            model.eval()
+            logger.info(f"✅ {self.display_name} 모델 로드 성공! (device={device}, dtype={selected_dtype})")
+            return model, processor
         except Exception as e:
+            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
+            raise
+        finally:
+            if use_local and model_path in sys.path:
+                logger.info(f"  -> sys.path에서 임시 경로 제거: {model_path}")
+                sys.path.remove(model_path)
     def get_generation_config(self) -> Dict[str, Any]:
         # 모델 파라미터 최적화 설정, max_new_tokens : 생성되는 텍스트 길이 최대값 (이미지 설명을 위해 증가)
+        return {"max_new_tokens": 50, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
     def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
         """

lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_0915.py ADDED Viewed

	@@ -0,0 +1,280 @@

+#!/usr/bin/env python3
+"""
+Kanana-1.5-v-3b-instruct 모델 프로필 (단순 로딩 최종본)
+"""
+import sys
+from typing import Dict, Any, Tuple
+import torch
+import logging
+from transformers import AutoTokenizer
+import os
+from dotenv import load_dotenv
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+logger = logging.getLogger(__name__)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+class Kanana15V3bInstructProfile:
+    """Kanana-1.5-v-3b-instruct 모델 프로필"""
+    def __init__(self):
+        # 환경 감지
+        self.is_local = self._detect_local_environment()
+        # 모델 경로 설정
+        if self.is_local:
+            self.model_name = "gbrabbit/lily-math-model"  # 로컬에서도 HF 모델명 사용
+            self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        else:
+            self.model_name = "gbrabbit/lily-math-model"  # Hugging Face Hub 모델 경로
+            self.local_path = None  # 서버에서는 로컬 경로 사용 안함
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        self.description = "카카오 멀티모달 모델 (3.6B) - Math RAG 특화"
+        self.language = "ko"
+        self.model_size = "3.6B"
+        self.multimodal = True
+    def _detect_local_environment(self) -> bool:
+        """로컬 환경인지 감지"""
+        import os
+        # 로컬 환경 감지 조건들
+        local_indicators = [
+            os.path.exists('.env'),
+            os.path.exists('../.env'),
+            os.path.exists('../../.env'),
+            os.getenv('IS_LOCAL') == 'true',
+            os.getenv('ENVIRONMENT') == 'local',
+            os.getenv('DOCKER_ENV') == 'local',
+            # Windows 경로 확인
+            os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
+        ]
+        is_local = any(local_indicators)
+        logger.info(f"🔍 환경 감지: {'로컬' if is_local else '서버'}")
+        return is_local
+    def _load_environment_variables(self):
+        """환경변수를 로드합니다."""
+        import os
+        try:
+            if self.is_local:
+                # 로컬 환경: .env 파일 로드
+                from dotenv import load_dotenv
+                # 여러 경로에서 .env 파일 찾기
+                env_paths = [
+                    '.env',
+                    '../.env',
+                    '../../.env',
+                    'C:/Project/lily_generate_project/lily_generate_package/.env',
+                ]
+                env_loaded = False
+                for env_path in env_paths:
+                    if os.path.exists(env_path):
+                        load_dotenv(env_path)
+                        logger.info(f"✅ 환경변수 로드됨: {env_path}")
+                        env_loaded = True
+                        break
+                if not env_loaded:
+                    logger.warning("⚠️ .env 파일을 찾을 수 없습니다")
+            else:
+                # 서버 환경: 시스템 환경변수 사용
+                logger.info("🌐 서버 환경변수 사용")
+        except ImportError:
+            logger.warning("⚠️ python-dotenv가 설치되지 않음")
+        except Exception as e:
+            logger.error(f"❌ 환경변수 로드 실패: {e}")
+    def load_model(self) -> Tuple[Any, Any]:
+        """환경에 따라 모델을 로드합니다."""
+        logger.info(f"📥 {self.display_name} 모델 로드 중...")
+        import os
+        from pathlib import Path
+        # 환경변수 로딩
+        self._load_environment_variables()
+        try:
+            # 1. 로컬 캐시 경로가 있는지 확인
+            use_local = False
+            if self.local_path is not None:
+                local_model_path = Path(self.local_path)
+                use_local = local_model_path.exists() and any(local_model_path.iterdir())
+            if use_local:
+                logger.info(f"🗂️ 로컬 모델 사용: {self.local_path}")
+                model_path = self.local_path
+                local_files_only = True
+                # 로컬 모델의 경우 sys.path에 추가
+                if self.local_path not in sys.path:
+                    sys.path.insert(0, self.local_path)
+            else:
+                logger.info(f"🌐 Hugging Face Hub에서 다운로드: {self.model_name}")
+                model_path = self.model_name
+                local_files_only = False
+            # 환경별 추가 설정
+            if self.is_local:
+                logger.info("🏠 로컬 환경 설정 적용")
+                # 로컬 환경에서는 추가 설정이 필요할 수 있음
+            else:
+                logger.info("���️ 서버 환경 설정 적용")
+                # 서버 환경에서는 캐시 디렉토리 등 설정
+            # DEBUG: 모델 경로와 설정 출력
+            logger.info(f"🔍 DEBUG: model_path = {model_path}")
+            logger.info(f"🔍 DEBUG: local_files_only = {local_files_only}")
+            logger.info(f"🔍 DEBUG: HF_TOKEN = {'있음' if HF_TOKEN else '없음'}")
+            logger.info(f"🔍 DEBUG: use_local = {use_local}")
+            logger.info(f"🔍 DEBUG: is_local = {self.is_local}")
+            # 2. 토크나이저 로드
+            logger.info(f"🔍 DEBUG: 토크나이저 로드 시작 - model_path={model_path}")
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                token=HF_TOKEN,
+                trust_remote_code=True,
+                local_files_only=local_files_only,
+                cache_dir="/app/cache/transformers" if not use_local else None
+            )
+            logger.info(f"✅ 토크나이저 로드 완료 ({tokenizer.__class__.__name__})")
+            # 3. 모델 로드
+            logger.info(f"🔍 DEBUG: 모델 로드 시작 - use_local={use_local}")
+            from modeling import KananaVForConditionalGeneration
+            if use_local:
+                # 로컬 모델: 커스텀 모델링 클래스 사용
+                logger.info("🔍 DEBUG: 로컬 모델 로드 시도")
+                selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
+                logger.info(f"🔍 DEBUG: selected_dtype = {selected_dtype}")
+                model = KananaVForConditionalGeneration.from_pretrained(
+                    model_path,
+                    token=HF_TOKEN,
+                    trust_remote_code=True,
+                    torch_dtype=selected_dtype,
+                    local_files_only=True,
+                    # low_cpu_mem_usage=True,
+                ).to(DEVICE)
+            else:
+                # 서버 환경: KananaVForConditionalGeneration 직접 사용
+                logger.info("🔍 DEBUG: 서버 모델 로드 시도")
+                logger.info("🔍 DEBUG: modeling 모듈 import 시도")
+                try:
+                    logger.info("🔍 DEBUG: modeling 모듈 import 성공")
+                except ImportError as e:
+                    logger.error(f"🔍 DEBUG: modeling 모듈 import 실패 - {e}")
+                    raise
+                logger.info("🔍 DEBUG: KananaVForConditionalGeneration.from_pretrained 호출")
+                # CPU 환경에서 float16/bfloat16보다 float32가 더 안정적인 경우가 많음
+                selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float16
+                model = KananaVForConditionalGeneration.from_pretrained(
+                    model_path,
+                    token=HF_TOKEN,
+                    torch_dtype=selected_dtype,
+                    trust_remote_code=True,
+                    cache_dir="/app/cache/transformers",
+                    # device_map="auto",
+                    # low_cpu_mem_usage=True,
+                ).to(DEVICE)
+            logger.info(f"✅ 모델 로드 완료 ({model.__class__.__name__})")
+            return model, tokenizer
+        except Exception as e:
+            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}", exc_info=True)
+            if use_local and self.local_path in sys.path:
+                sys.path.remove(self.local_path)
+            raise
+    def get_generation_config(self) -> Dict[str, Any]:
+        # 모델 파라미터 최적화 설정, max_new_tokens : 생성되는 텍스트 길이 최대값 (이미지 설명을 위해 증가)
+        return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
+    def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
+        """
+        다양한 응답 형식을 처리할 수 있는 더 똑똑한 응답 추출 함수
+        """
+        logger.info(f"--- 응답 추출 시작 ---")
+        logger.info(f"전체 생성 텍스트 (Raw): \n---\n{full_text}\n---")
+        # 프롬프트가 제공된 경우 이를 제거
+        if formatted_prompt and formatted_prompt in full_text:
+            response = full_text.replace(formatted_prompt, "").strip()
+            logger.info(f"✅ 성공: 프롬프트 제거로 응답 추출")
+            logger.info(f"추출된 응답: {response}")
+            if response:  # 빈 문자열이 아닌 경우에만 반환
+                return response
+        # 1순위: 가장 정확한 특수 태그로 추출 시도
+        # 예: <|start_header_id|>assistant<|end_header_id|>안녕하세요...
+        # 또는 <|im_start|>assistant안녕하세요...
+        assistant_tags = [
+            "<|start_header_id|>assistant<|end_header_id|>",
+            "<|im_start|>assistant",
+            "assistant\n",
+            "assistant:"
+        ]
+        for tag in assistant_tags:
+            if tag in full_text:
+                parts = full_text.split(tag)
+                if len(parts) > 1:
+                    response = parts[-1].strip()
+                    # 추가 정리: 특수 토큰 제거
+                    response = response.replace("<|im_end|>", "").strip()
+                    logger.info(f"✅ 성공: '{tag}' 태그로 응답 추출")
+                    logger.info(f"추출된 응답: {response}")
+                    if response:  # 빈 문자열이 아닌 경우에만 반환
+                        return response
+        # 2순위: 간단한 키워드로 추출 시도
+        # 예: ... user 안녕하세요 assistant 안녕하세요 ...
+        if "assistant" in full_text:
+            parts = full_text.split("assistant")
+            if len(parts) > 1:
+                response = parts[-1].strip()
+                response = response.replace("<|im_end|>", "").strip()
+                logger.info("✅ 성공: 'assistant' 키워드로 응답 추출")
+                logger.info(f"추출된 응답: {response}")
+                if response:  # 빈 문자열이 아닌 경우에만 반환
+                    return response
+        # 3순위: 프롬프트가 없는 경우, 전체 텍스트에서 불필요한 부분 제거
+        clean_text = full_text.strip()
+        # 일반적인 프롬프트 패턴 제거 시도
+        patterns_to_remove = [
+            "<|im_start|>user\n",
+            "<|im_end|>",
+            "<image>",
+            "user\n",
+            "assistant\n"
+        ]
+        for pattern in patterns_to_remove:
+            clean_text = clean_text.replace(pattern, "")
+        clean_text = clean_text.strip()
+        if clean_text and clean_text != full_text:
+            logger.info("✅ 성공: 패턴 제거로 응답 정리")
+            logger.info(f"정리된 응답: {clean_text}")
+            return clean_text
+        logger.warning("⚠️ 경고: 응답에서 assistant 부분을 찾지 못했습니다. 전체 텍스트를 반환합니다.")
+        logger.info(f"최종 반환 텍스트: {full_text}")
+        return full_text
+    def get_model_info(self) -> Dict[str, Any]:
+        return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}

lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_1134py ADDED Viewed

	@@ -0,0 +1,236 @@

+#!/usr/bin/env python3
+"""
+Kanana-1.5-v-3b-instruct 모델 프로필 (단순 로딩 최종본)
+"""
+import sys
+from typing import Dict, Any, Tuple
+import torch
+import logging
+from transformers import AutoTokenizer
+import os
+from dotenv import load_dotenv
+from pathlib import Path
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+logger = logging.getLogger(__name__)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+class Kanana15V3bInstructProfile:
+    """Kanana-1.5-v-3b-instruct 모델 프로필"""
+    def __init__(self):
+        # 환경 감지
+        self.is_local = self._detect_local_environment()
+        # 모델 경로 설정
+        if self.is_local:
+            self.model_name = "gbrabbit/lily-math-model"  # 로컬에서도 HF 모델명 사용
+            self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        else:
+            self.model_name = "gbrabbit/lily-math-model"  # Hugging Face Hub 모델 경로
+            self.local_path = None  # 서버에서는 로컬 경로 사용 안함
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        self.description = "카카오 멀티모달 모델 (3.6B) - Math RAG 특화"
+        self.language = "ko"
+        self.model_size = "3.6B"
+        self.multimodal = True
+    def _detect_local_environment(self) -> bool:
+        """로컬 환경인지 감지"""
+        import os
+        # 로컬 환경 감지 조건들
+        local_indicators = [
+            os.path.exists('.env'),
+            os.path.exists('../.env'),
+            os.path.exists('../../.env'),
+            os.getenv('IS_LOCAL') == 'true',
+            os.getenv('ENVIRONMENT') == 'local',
+            os.getenv('DOCKER_ENV') == 'local',
+            # Windows 경로 확인
+            os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
+        ]
+        is_local = any(local_indicators)
+        logger.info(f"🔍 환경 감지: {'로컬' if is_local else '서버'}")
+        return is_local
+    def _load_environment_variables(self):
+        """환경변수를 로드합니다."""
+        import os
+        try:
+            if self.is_local:
+                # 로컬 환경: .env 파일 로드
+                from dotenv import load_dotenv
+                # 여러 경로에서 .env 파일 찾기
+                env_paths = [
+                    '.env',
+                    '../.env',
+                    '../../.env',
+                    'C:/Project/lily_generate_project/lily_generate_package/.env',
+                ]
+                env_loaded = False
+                for env_path in env_paths:
+                    if os.path.exists(env_path):
+                        load_dotenv(env_path)
+                        logger.info(f"✅ 환경변수 로드됨: {env_path}")
+                        env_loaded = True
+                        break
+                if not env_loaded:
+                    logger.warning("⚠️ .env 파일을 찾을 수 없습니다")
+            else:
+                # 서버 환경: 시스템 환경변수 사용
+                logger.info("🌐 서버 환경변수 사용")
+        except ImportError:
+            logger.warning("⚠️ python-dotenv가 설치되지 않음")
+        except Exception as e:
+            logger.error(f"❌ 환경변수 로드 실패: {e}")
+    def load_model(self) -> Tuple[Any, Any]:
+        """모델 로드 (공식적인 방법 + 절대 경로 sys.path 수정 최종본)"""
+        logger.info(f"📥 {self.display_name} 모델 로드 중 (공식 방법)...")
+        import sys
+        from pathlib import Path
+        import os  # <<< os 모듈 추가
+        # self.local_path를 절대 경로로 변환
+        absolute_model_path = os.path.abspath(self.local_path)
+        use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
+        # model_path 변수에 절대 경로를 사용
+        model_path = absolute_model_path if use_local else self.model_name
+        try:
+            from transformers import AutoModelForVision2Seq, AutoProcessor
+            logger.info(f"🔍 모델 경로: {model_path} (local={'yes' if use_local else 'no'})")
+            if use_local and model_path not in sys.path:
+                logger.info(f"  -> sys.path에 임시 경로 추가: {model_path}")
+                sys.path.insert(0, model_path)
+            processor = AutoProcessor.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                local_files_only=use_local
+            )
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            selected_dtype = torch.bfloat16
+            # CPU bfloat16 지원 여부 체크 로직 수정
+            if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'bfloat16')):
+                selected_dtype = torch.float32
+                logger.warning("CPU에서 bfloat16을 지원하지 않거나 MKL이 없어 float32로 변경합니다.")
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                torch_dtype=selected_dtype,
+                local_files_only=use_local,
+            ).to(device)
+            model.eval()
+            logger.info(f"✅ {self.display_name} 모델 로드 성공! (device={device}, dtype={selected_dtype})")
+            return model, processor
+        except Exception as e:
+            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
+            raise
+        finally:
+            if use_local and model_path in sys.path:
+                logger.info(f"  -> sys.path에서 임시 경로 제거: {model_path}")
+                sys.path.remove(model_path)
+    def get_generation_config(self) -> Dict[str, Any]:
+        # 모델 파라미터 최적화 설정, max_new_tokens : 생성되는 텍스트 길이 최대값 (이미지 설명을 위해 증가)
+        return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
+    def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
+        """
+        다양한 응답 형식을 처리할 수 있는 더 똑똑한 응답 추출 함수
+        """
+        logger.info(f"--- 응답 추출 시작 ---")
+        logger.info(f"전체 생성 텍스트 (Raw): \n---\n{full_text}\n---")
+        # 프롬프트가 제공된 경우 이를 제거
+        if formatted_prompt and formatted_prompt in full_text:
+            response = full_text.replace(formatted_prompt, "").strip()
+            logger.info(f"✅ 성공: 프롬프트 제거로 응답 추출")
+            logger.info(f"추출된 응답: {response}")
+            if response:  # 빈 문자열이 아닌 경우에만 반환
+                return response
+        # 1순위: 가장 정확한 특수 태그로 추출 시도
+        # 예: <|start_header_id|>assistant<|end_header_id|>안녕하세요...
+        # 또는 <|im_start|>assistant안녕하세요...
+        assistant_tags = [
+            "<|start_header_id|>assistant<|end_header_id|>",
+            "<|im_start|>assistant",
+            "assistant\n",
+            "assistant:"
+        ]
+        for tag in assistant_tags:
+            if tag in full_text:
+                parts = full_text.split(tag)
+                if len(parts) > 1:
+                    response = parts[-1].strip()
+                    # 추가 정리: 특수 토큰 제거
+                    response = response.replace("<|im_end|>", "").strip()
+                    logger.info(f"✅ 성공: '{tag}' 태그로 응답 추출")
+                    logger.info(f"추출된 응답: {response}")
+                    if response:  # 빈 문자열이 아닌 경우에만 반환
+                        return response
+        # 2순위: 간단한 키워드로 추출 시도
+        # 예: ... user 안녕하세요 assistant 안녕하세요 ...
+        if "assistant" in full_text:
+            parts = full_text.split("assistant")
+            if len(parts) > 1:
+                response = parts[-1].strip()
+                response = response.replace("<|im_end|>", "").strip()
+                logger.info("✅ 성공: 'assistant' 키워드로 응답 추출")
+                logger.info(f"추출된 응답: {response}")
+                if response:  # 빈 문자열이 아닌 경우에만 반환
+                    return response
+        # 3순위: 프롬프트가 없는 경우, 전체 텍스트에서 불필요한 부분 제거
+        clean_text = full_text.strip()
+        # 일반적인 프롬프트 패턴 제거 시도
+        patterns_to_remove = [
+            "<|im_start|>user\n",
+            "<|im_end|>",
+            "<image>",
+            "user\n",
+            "assistant\n"
+        ]
+        for pattern in patterns_to_remove:
+            clean_text = clean_text.replace(pattern, "")
+        clean_text = clean_text.strip()
+        if clean_text and clean_text != full_text:
+            logger.info("✅ 성공: 패턴 제거로 응답 정리")
+            logger.info(f"정리된 응답: {clean_text}")
+            return clean_text
+        logger.warning("⚠️ 경고: 응답에서 assistant 부분을 찾지 못했습니다. 전체 텍스트를 반환합니다.")
+        logger.info(f"최종 반환 텍스트: {full_text}")
+        return full_text
+    def get_model_info(self) -> Dict[str, Any]:
+        return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}

lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_1304.py ADDED Viewed

	@@ -0,0 +1,235 @@

+#!/usr/bin/env python3
+"""
+Kanana-1.5-v-3b-instruct 모델 프로필 (단순 로딩 최종본)
+"""
+import sys
+from typing import Dict, Any, Tuple
+import torch
+import logging
+from transformers import AutoTokenizer
+import os
+from dotenv import load_dotenv
+from pathlib import Path
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+logger = logging.getLogger(__name__)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+class Kanana15V3bInstructProfile:
+    """Kanana-1.5-v-3b-instruct 모델 프로필"""
+    def __init__(self):
+        # 환경 감지
+        self.is_local = self._detect_local_environment()
+        # 모델 경로 설정
+        if self.is_local:
+            self.model_name = "gbrabbit/lily-math-model"  # 로컬에서도 HF 모델명 사용
+            self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        else:
+            self.model_name = "gbrabbit/lily-math-model"  # Hugging Face Hub 모델 경로
+            self.local_path = None  # 서버에서는 로컬 경로 사용 안함
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        self.description = "카카오 멀티모달 모델 (3.6B) - Math RAG 특화"
+        self.language = "ko"
+        self.model_size = "3.6B"
+        self.multimodal = True
+    def _detect_local_environment(self) -> bool:
+        """로컬 환경인지 감지"""
+        import os
+        # 로컬 환경 감지 조건들
+        local_indicators = [
+            os.path.exists('.env'),
+            os.path.exists('../.env'),
+            os.path.exists('../../.env'),
+            os.getenv('IS_LOCAL') == 'true',
+            os.getenv('ENVIRONMENT') == 'local',
+            os.getenv('DOCKER_ENV') == 'local',
+            # Windows 경로 확인
+            os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
+        ]
+        is_local = any(local_indicators)
+        logger.info(f"🔍 환경 감지: {'로컬' if is_local else '서버'}")
+        return is_local
+    def _load_environment_variables(self):
+        """환경변수를 로드합니다."""
+        import os
+        try:
+            if self.is_local:
+                # 로컬 환경: .env 파일 로드
+                from dotenv import load_dotenv
+                # 여러 경로에서 .env 파일 찾기
+                env_paths = [
+                    '.env',
+                    '../.env',
+                    '../../.env',
+                    'C:/Project/lily_generate_project/lily_generate_package/.env',
+                ]
+                env_loaded = False
+                for env_path in env_paths:
+                    if os.path.exists(env_path):
+                        load_dotenv(env_path)
+                        logger.info(f"✅ 환경변수 로드됨: {env_path}")
+                        env_loaded = True
+                        break
+                if not env_loaded:
+                    logger.warning("⚠️ .env 파일을 찾을 수 없습니다")
+            else:
+                # 서버 환경: 시스템 환경변수 사용
+                logger.info("🌐 서버 환경변수 사용")
+        except ImportError:
+            logger.warning("⚠️ python-dotenv가 설치되지 않음")
+        except Exception as e:
+            logger.error(f"❌ 환경변수 로드 실패: {e}")
+    def load_model(self) -> Tuple[Any, Any]:
+        """모델 로드 (공식적인 방법 + 절대 경로 sys.path 수정 최종본)"""
+        logger.info(f"📥 {self.display_name} 모델 로드 중 (공식 방법)...")
+        import sys
+        from pathlib import Path
+        import os  # <<< os 모듈 추가
+        # self.local_path를 절대 경로로 변환
+        absolute_model_path = os.path.abspath(self.local_path)
+        use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
+        # model_path 변수에 절대 경로를 사용
+        model_path = absolute_model_path if use_local else self.model_name
+        try:
+            from transformers import AutoModelForVision2Seq, AutoProcessor
+            logger.info(f"🔍 모델 경로: {model_path} (local={'yes' if use_local else 'no'})")
+            if use_local and model_path not in sys.path:
+                logger.info(f"  -> sys.path에 임시 경로 추가: {model_path}")
+                sys.path.insert(0, model_path)
+            processor = AutoProcessor.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                local_files_only=use_local
+            )
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            selected_dtype = torch.bfloat16
+            # CPU bfloat16 지원 여부 체크 로직 수정
+            if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'bfloat16')):
+                selected_dtype = torch.bfloat16
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                torch_dtype=selected_dtype,
+                local_files_only=use_local,
+            ).to(device)
+            model.eval()
+            logger.info(f"✅ {self.display_name} 모델 로드 성공! (device={device}, dtype={selected_dtype})")
+            return model, processor
+        except Exception as e:
+            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
+            raise
+        finally:
+            if use_local and model_path in sys.path:
+                logger.info(f"  -> sys.path에서 임시 경로 제거: {model_path}")
+                sys.path.remove(model_path)
+    def get_generation_config(self) -> Dict[str, Any]:
+        # 모델 파라미터 최적화 설정, max_new_tokens : 생성되는 텍스트 길이 최대값 (이미지 설명을 위해 증가)
+        return {"max_new_tokens": 20, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
+    def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
+        """
+        다양한 응답 형식을 처리할 수 있는 더 똑똑한 응답 추출 함수
+        """
+        logger.info(f"--- 응답 추출 시작 ---")
+        logger.info(f"전체 생성 텍스트 (Raw): \n---\n{full_text}\n---")
+        # 프롬프트가 제공된 경우 이를 제거
+        if formatted_prompt and formatted_prompt in full_text:
+            response = full_text.replace(formatted_prompt, "").strip()
+            logger.info(f"✅ 성공: 프롬프트 제거로 응답 추출")
+            logger.info(f"추출된 응답: {response}")
+            if response:  # 빈 문자열이 아닌 경우에만 반환
+                return response
+        # 1순위: 가장 정확한 특수 태그로 추출 시도
+        # 예: <|start_header_id|>assistant<|end_header_id|>안녕하세요...
+        # 또는 <|im_start|>assistant안녕하세요...
+        assistant_tags = [
+            "<|start_header_id|>assistant<|end_header_id|>",
+            "<|im_start|>assistant",
+            "assistant\n",
+            "assistant:"
+        ]
+        for tag in assistant_tags:
+            if tag in full_text:
+                parts = full_text.split(tag)
+                if len(parts) > 1:
+                    response = parts[-1].strip()
+                    # 추가 정리: 특수 토큰 제거
+                    response = response.replace("<|im_end|>", "").strip()
+                    logger.info(f"✅ 성공: '{tag}' 태그로 응답 추출")
+                    logger.info(f"추출된 응답: {response}")
+                    if response:  # 빈 문자열이 아닌 경우에만 반환
+                        return response
+        # 2순위: 간단한 키워드로 추출 시도
+        # 예: ... user 안녕하세요 assistant 안녕하세요 ...
+        if "assistant" in full_text:
+            parts = full_text.split("assistant")
+            if len(parts) > 1:
+                response = parts[-1].strip()
+                response = response.replace("<|im_end|>", "").strip()
+                logger.info("✅ 성공: 'assistant' 키워드로 응답 추출")
+                logger.info(f"추출된 응답: {response}")
+                if response:  # 빈 문자열이 아닌 경우에만 반환
+                    return response
+        # 3순위: 프롬프트가 없는 경우, 전체 텍스트에서 불필요한 부분 제거
+        clean_text = full_text.strip()
+        # 일반적인 프롬프트 패턴 제거 시도
+        patterns_to_remove = [
+            "<|im_start|>user\n",
+            "<|im_end|>",
+            "<image>",
+            "user\n",
+            "assistant\n"
+        ]
+        for pattern in patterns_to_remove:
+            clean_text = clean_text.replace(pattern, "")
+        clean_text = clean_text.strip()
+        if clean_text and clean_text != full_text:
+            logger.info("✅ 성공: 패턴 제거로 응답 정리")
+            logger.info(f"정리된 응답: {clean_text}")
+            return clean_text
+        logger.warning("⚠️ 경고: 응답에서 assistant 부분을 찾지 못했습니다. 전체 텍스트를 반환합니다.")
+        logger.info(f"최종 반환 텍스트: {full_text}")
+        return full_text
+    def get_model_info(self) -> Dict[str, Any]:
+        return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}

lily_llm_api/models/kanana_1_5_v_3b_instruct_250819_2008.py ADDED Viewed

	@@ -0,0 +1,235 @@

+#!/usr/bin/env python3
+"""
+Kanana-1.5-v-3b-instruct 모델 프로필 (단순 로딩 최종본)
+"""
+import sys
+from typing import Dict, Any, Tuple
+import torch
+import logging
+from transformers import AutoTokenizer
+import os
+from dotenv import load_dotenv
+from pathlib import Path
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+logger = logging.getLogger(__name__)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+class Kanana15V3bInstructProfile:
+    """Kanana-1.5-v-3b-instruct 모델 프로필"""
+    def __init__(self):
+        # 환경 감지
+        self.is_local = self._detect_local_environment()
+        # 모델 경로 설정
+        if self.is_local:
+            self.model_name = "gbrabbit/lily-math-model"  # 로컬에서도 HF 모델명 사용
+            self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        else:
+            self.model_name = "gbrabbit/lily-math-model"  # Hugging Face Hub 모델 경로
+            self.local_path = None  # 서버에서는 로컬 경로 사용 안함
+            self.display_name = "kanana-1.5-v-3b-instruct"
+        self.description = "카카오 멀티모달 모델 (3.6B) - Math RAG 특화"
+        self.language = "ko"
+        self.model_size = "3.6B"
+        self.multimodal = True
+    def _detect_local_environment(self) -> bool:
+        """로컬 환경인지 감지"""
+        import os
+        # 로컬 환경 감지 조건들
+        local_indicators = [
+            os.path.exists('.env'),
+            os.path.exists('../.env'),
+            os.path.exists('../../.env'),
+            os.getenv('IS_LOCAL') == 'true',
+            os.getenv('ENVIRONMENT') == 'local',
+            os.getenv('DOCKER_ENV') == 'local',
+            # Windows 경로 확인
+            os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
+        ]
+        is_local = any(local_indicators)
+        logger.info(f"🔍 환경 감지: {'로컬' if is_local else '서버'}")
+        return is_local
+    def _load_environment_variables(self):
+        """환경변수를 로드합니다."""
+        import os
+        try:
+            if self.is_local:
+                # 로컬 환경: .env 파일 로드
+                from dotenv import load_dotenv
+                # 여러 경로에서 .env 파일 찾기
+                env_paths = [
+                    '.env',
+                    '../.env',
+                    '../../.env',
+                    'C:/Project/lily_generate_project/lily_generate_package/.env',
+                ]
+                env_loaded = False
+                for env_path in env_paths:
+                    if os.path.exists(env_path):
+                        load_dotenv(env_path)
+                        logger.info(f"✅ 환경변수 로드됨: {env_path}")
+                        env_loaded = True
+                        break
+                if not env_loaded:
+                    logger.warning("⚠️ .env 파일을 찾을 수 없습니다")
+            else:
+                # 서버 환경: 시스템 환경변수 사용
+                logger.info("🌐 서버 환경변수 사용")
+        except ImportError:
+            logger.warning("⚠️ python-dotenv가 설치되지 않음")
+        except Exception as e:
+            logger.error(f"❌ 환경변수 로드 실패: {e}")
+    def load_model(self) -> Tuple[Any, Any]:
+        """모델 로드 (공식적인 방법 + 절대 경로 sys.path 수정 최종본)"""
+        logger.info(f"📥 {self.display_name} 모델 로드 중 (공식 방법)...")
+        import sys
+        from pathlib import Path
+        import os  # <<< os 모듈 추가
+        # self.local_path를 절대 경로로 변환
+        absolute_model_path = os.path.abspath(self.local_path)
+        use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
+        # model_path 변수에 절대 경로를 사용
+        model_path = absolute_model_path if use_local else self.model_name
+        try:
+            from transformers import AutoModelForVision2Seq, AutoProcessor
+            logger.info(f"🔍 모델 경로: {model_path} (local={'yes' if use_local else 'no'})")
+            if use_local and model_path not in sys.path:
+                logger.info(f"  -> sys.path에 임시 경로 추가: {model_path}")
+                sys.path.insert(0, model_path)
+            processor = AutoProcessor.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                local_files_only=use_local
+            )
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            selected_dtype = torch.float16
+            # CPU bfloat16 지원 여부 체크 로직 수정
+            if device == 'cpu' and not (hasattr(torch.backends, 'mkl') and torch.backends.mkl.is_available() and hasattr(torch, 'float16')):
+                selected_dtype = torch.float16
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                torch_dtype=selected_dtype,
+                local_files_only=use_local,
+            ).to(device)
+            model.eval()
+            logger.info(f"✅ {self.display_name} 모델 로드 성공! (device={device}, dtype={selected_dtype})")
+            return model, processor
+        except Exception as e:
+            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
+            raise
+        finally:
+            if use_local and model_path in sys.path:
+                logger.info(f"  -> sys.path에서 임시 경로 제거: {model_path}")
+                sys.path.remove(model_path)
+    def get_generation_config(self) -> Dict[str, Any]:
+        # 모델 파라미터 최적화 설정, max_new_tokens : 생성되는 텍스트 길이 최대값 (이미지 설명을 위해 증가)
+        return {"max_new_tokens": 50, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
+    def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
+        """
+        다양한 응답 형식을 처리할 수 있는 더 똑똑한 응답 추출 함수
+        """
+        logger.info(f"--- 응답 추출 시작 ---")
+        logger.info(f"전체 생성 텍스트 (Raw): \n---\n{full_text}\n---")
+        # 프롬프트가 제공된 경우 이를 제거
+        if formatted_prompt and formatted_prompt in full_text:
+            response = full_text.replace(formatted_prompt, "").strip()
+            logger.info(f"✅ 성공: 프롬프트 제거로 응답 추출")
+            logger.info(f"추출된 응답: {response}")
+            if response:  # 빈 문자열이 아닌 경우에만 반환
+                return response
+        # 1순위: 가장 정확한 특수 태그로 추출 시도
+        # 예: <|start_header_id|>assistant<|end_header_id|>안녕하세요...
+        # 또는 <|im_start|>assistant안녕하세요...
+        assistant_tags = [
+            "<|start_header_id|>assistant<|end_header_id|>",
+            "<|im_start|>assistant",
+            "assistant\n",
+            "assistant:"
+        ]
+        for tag in assistant_tags:
+            if tag in full_text:
+                parts = full_text.split(tag)
+                if len(parts) > 1:
+                    response = parts[-1].strip()
+                    # 추가 정리: 특수 토큰 제거
+                    response = response.replace("<|im_end|>", "").strip()
+                    logger.info(f"✅ 성공: '{tag}' 태그로 응답 추출")
+                    logger.info(f"추출된 응답: {response}")
+                    if response:  # 빈 문자열이 아닌 경우에만 반환
+                        return response
+        # 2순위: 간단한 키워드로 추출 시도
+        # 예: ... user 안녕하세요 assistant 안녕하세요 ...
+        if "assistant" in full_text:
+            parts = full_text.split("assistant")
+            if len(parts) > 1:
+                response = parts[-1].strip()
+                response = response.replace("<|im_end|>", "").strip()
+                logger.info("✅ 성공: 'assistant' 키워드로 응답 추출")
+                logger.info(f"추출된 응답: {response}")
+                if response:  # 빈 문자열이 아닌 경우에만 반환
+                    return response
+        # 3순위: 프롬프트가 없는 경우, 전체 텍스트에서 불필요한 부분 제거
+        clean_text = full_text.strip()
+        # 일반적인 프롬프트 패턴 제거 시도
+        patterns_to_remove = [
+            "<|im_start|>user\n",
+            "<|im_end|>",
+            "<image>",
+            "user\n",
+            "assistant\n"
+        ]
+        for pattern in patterns_to_remove:
+            clean_text = clean_text.replace(pattern, "")
+        clean_text = clean_text.strip()
+        if clean_text and clean_text != full_text:
+            logger.info("✅ 성공: 패턴 제거로 응답 정리")
+            logger.info(f"정리된 응답: {clean_text}")
+            return clean_text
+        logger.warning("⚠️ 경고: 응답에서 assistant 부분을 찾지 못했습니다. 전체 텍스트를 반환합니다.")
+        logger.info(f"최종 반환 텍스트: {full_text}")
+        return full_text
+    def get_model_info(self) -> Dict[str, Any]:
+        return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}

lily_llm_core/context_manager.py ADDED Viewed

	@@ -0,0 +1,410 @@

+#!/usr/bin/env python3
+"""
+컨텍스트 관리자 (Context Manager)
+대화 히스토리와 단기 기억을 관리하는 시스템
+"""
+import logging
+import time
+from typing import List, Dict, Any, Optional, Tuple
+from dataclasses import dataclass
+from collections import deque
+import json
+logger = logging.getLogger(__name__)
+@dataclass
+class ConversationTurn:
+    """대화 턴을 나타내는 데이터 클래스"""
+    role: str  # 'user' 또는 'assistant'
+    content: str
+    timestamp: float
+    message_id: str
+    metadata: Optional[Dict[str, Any]] = None
+class ContextManager:
+    """대화 컨텍스트를 관리하는 클래스"""
+    def __init__(self,
+                 max_tokens: int = 4000,
+                 max_turns: int = 20,
+                 strategy: str = "sliding_window"):
+        """
+        Args:
+            max_tokens: 최대 토큰 수
+            max_turns: 최대 대화 턴 수
+            strategy: 컨텍스트 관리 전략 ('sliding_window', 'priority_keep', 'circular')
+        """
+        self.max_tokens = max_tokens
+        self.max_turns = max_turns
+        self.strategy = strategy
+        # 대화 히스토리 (deque 사용으로 효율적인 양방향 접근)
+        self.conversation_history: deque = deque(maxlen=max_turns * 2)
+        # 시스템 프롬프트
+        self.system_prompt = ""
+        # 컨텍스트 통계
+        self.total_tokens = 0
+        self.current_context_length = 0
+        # 메모리 최적화 설정
+        self.enable_memory_optimization = True
+        self.compression_threshold = 0.8  # 80% 도달 시 압축 시작
+        logger.info(f"🔧 컨텍스트 관리자 초기화: max_tokens={max_tokens}, strategy={strategy}")
+    def set_system_prompt(self, prompt: str):
+        """시스템 프롬프트 설정"""
+        self.system_prompt = prompt
+        logger.info(f"📝 시스템 프롬프트 설정: {len(prompt)} 문자")
+    def add_user_message(self, content: str, message_id: str = None, metadata: Dict[str, Any] = None) -> str:
+        """사용자 메시지 추가"""
+        if not message_id:
+            message_id = f"user_{int(time.time() * 1000)}"
+        turn = ConversationTurn(
+            role="user",
+            content=content,
+            timestamp=time.time(),
+            message_id=message_id,
+            metadata=metadata or {}
+        )
+        self.conversation_history.append(turn)
+        self._update_context_stats()
+        self._optimize_context()
+        logger.info(f"👤 사용자 메시지 추가: {len(content)} 문자 (총 {len(self.conversation_history)} 턴)")
+        return message_id
+    def add_assistant_message(self, content: str, message_id: str = None, metadata: Dict[str, Any] = None) -> str:
+        """어시스턴트 메시지 추가"""
+        if not message_id:
+            message_id = f"assistant_{int(time.time() * 1000)}"
+        turn = ConversationTurn(
+            role="assistant",
+            content=content,
+            timestamp=time.time(),
+            message_id=message_id,
+            metadata=metadata or {}
+        )
+        self.conversation_history.append(turn)
+        self._update_context_stats()
+        self._optimize_context()
+        logger.info(f"🤖 어시스턴트 메시지 추가: {len(content)} 문자 (총 {len(self.conversation_history)} 턴)")
+        return message_id
+    def get_context(self, include_system: bool = True, max_length: Optional[int] = None) -> str:
+        """현재 컨텍스트를 문자열로 반환"""
+        context_parts = []
+        # 시스템 프롬프트 포함
+        if include_system and self.system_prompt:
+            context_parts.append(f"<|im_start|>system\n{self.system_prompt}<|im_end|>")
+        # 대화 히스토리 포함
+        for turn in self.conversation_history:
+            if turn.role == "user":
+                context_parts.append(f"<|im_start|>user\n{turn.content}<|im_end|>")
+            elif turn.role == "assistant":
+                context_parts.append(f"<|im_start|>assistant\n{turn.content}<|im_end|>")
+        # 어시스턴트 응답 시작 토큰 추가
+        context_parts.append("<|im_start|>assistant\n")
+        context = "\n".join(context_parts)
+        # 길이 제한 적용
+        if max_length and len(context) > max_length:
+            context = self._truncate_context(context, max_length)
+        return context
+    def get_context_for_model(self, model_name: str = "default") -> str:
+        """모델별 최적화된 컨텍스트 반환"""
+        # 모델별 특별한 처리 (필요시 확장)
+        if "kanana" in model_name.lower():
+            return self.get_context(include_system=True)
+        elif "llama" in model_name.lower():
+            # Llama 형식
+            return self._format_for_llama()
+        else:
+            return self.get_context(include_system=True)
+    def _format_for_llama(self) -> str:
+        """Llama 모델용 형식으로 변환"""
+        context_parts = []
+        if self.system_prompt:
+            context_parts.append(f"[INST] {self.system_prompt} [/INST]")
+        for turn in self.conversation_history:
+            if turn.role == "user":
+                context_parts.append(f"[INST] {turn.content} [/INST]")
+            elif turn.role == "assistant":
+                context_parts.append(turn.content)
+        return "\n".join(context_parts)
+    def get_recent_context(self, turns: int = 5) -> str:
+        """최근 N개 턴의 컨텍스트만 반환"""
+        recent_turns = list(self.conversation_history)[-turns:]
+        context_parts = []
+        for turn in recent_turns:
+            if turn.role == "user":
+                context_parts.append(f"<|im_start|>user\n{turn.content}<|im_end|>")
+            elif turn.role == "assistant":
+                context_parts.append(f"<|im_start|>assistant\n{turn.content}<|im_end|>")
+        context_parts.append("<|im_start|>assistant\n")
+        return "\n".join(context_parts)
+    def get_context_summary(self) -> Dict[str, Any]:
+        """컨텍스트 요약 정보 반환"""
+        return {
+            "total_turns": len(self.conversation_history),
+            "user_messages": len([t for t in self.conversation_history if t.role == "user"]),
+            "assistant_messages": len([t for t in self.conversation_history if t.role == "assistant"]),
+            "estimated_tokens": self.total_tokens,
+            "context_length": self.current_context_length,
+            "memory_usage": len(self.conversation_history) / self.max_turns,
+            "oldest_message": self.conversation_history[0].timestamp if self.conversation_history else None,
+            "newest_message": self.conversation_history[-1].timestamp if self.conversation_history else None
+        }
+    def clear_context(self):
+        """컨텍스트 초기화"""
+        self.conversation_history.clear()
+        self.total_tokens = 0
+        self.current_context_length = 0
+        logger.info("🗑️ 컨텍스트 초기화 완료")
+    def remove_message(self, message_id: str) -> bool:
+        """특정 메시지 제거"""
+        for i, turn in enumerate(self.conversation_history):
+            if turn.message_id == message_id:
+                removed_turn = self.conversation_history.pop(i)
+                self._update_context_stats()
+                logger.info(f"🗑️ 메시지 제거: {message_id}")
+                return True
+        return False
+    def edit_message(self, message_id: str, new_content: str) -> bool:
+        """메시지 내용 수정"""
+        for turn in self.conversation_history:
+            if turn.message_id == message_id:
+                turn.content = new_content
+                turn.timestamp = time.time()
+                self._update_context_stats()
+                logger.info(f"✏️ 메시지 수정: {message_id}")
+                return True
+        return False
+    def search_context(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
+        """컨텍스트 내에서 검색"""
+        results = []
+        query_lower = query.lower()
+        for turn in self.conversation_history:
+            if query_lower in turn.content.lower():
+                results.append({
+                    "message_id": turn.message_id,
+                    "role": turn.role,
+                    "content": turn.content,
+                    "timestamp": turn.timestamp,
+                    "relevance_score": self._calculate_relevance(query, turn.content)
+                })
+        # 관련성 점수로 정렬
+        results.sort(key=lambda x: x["relevance_score"], reverse=True)
+        return results[:max_results]
+    def _calculate_relevance(self, query: str, content: str) -> float:
+        """간단한 관련성 점수 계산"""
+        query_words = set(query.lower().split())
+        content_words = set(content.lower().split())
+        if not query_words:
+            return 0.0
+        intersection = query_words.intersection(content_words)
+        return len(intersection) / len(query_words)
+    def _update_context_stats(self):
+        """컨텍스트 통계 업데이트"""
+        self.current_context_length = len(self.get_context())
+        # 간단한 토큰 추정 (실제 토크나이저 사용 권장)
+        self.total_tokens = self.current_context_length // 4
+    def _optimize_context(self):
+        """컨텍스트 최적화"""
+        if not self.enable_memory_optimization:
+            return
+        # 메모리 사용량이 임계값을 초과하면 압축 시작
+        if len(self.conversation_history) / self.max_turns > self.compression_threshold:
+            self._compress_context()
+    def _compress_context(self):
+        """컨텍스트 압축 (중요한 메시지 유지)"""
+        if len(self.conversation_history) <= self.max_turns:
+            return
+        logger.info(f"🗜️ 컨텍스트 압축 시작: {len(self.conversation_history)} → {self.max_turns}")
+        # 전략에 따른 압축
+        if self.strategy == "sliding_window":
+            # 슬라이딩 윈도우: 최근 메시지 우선
+            while len(self.conversation_history) > self.max_turns:
+                self.conversation_history.popleft()
+        elif self.strategy == "priority_keep":
+            # 우선순위 기반: 시스템 프롬프트와 최근 메시지 우선
+            # 첫 번째와 마지막 메시지는 유지
+            if len(self.conversation_history) > self.max_turns:
+                # 중간 메시지들 중 일부 제거
+                middle_start = self.max_turns // 2
+                middle_end = len(self.conversation_history) - self.max_turns // 2
+                # 중간 부분을 요약으로 대체
+                removed_turns = list(self.conversation_history)[middle_start:middle_end]
+                summary_content = f"[이전 {len(removed_turns)}개 메시지 요약: {len(removed_turns)}개 대화 턴]"
+                # 중간 부분 제거
+                for _ in range(middle_end - middle_start):
+                    self.conversation_history.pop(middle_start)
+                # 요약 메시지 추가
+                summary_turn = ConversationTurn(
+                    role="system",
+                    content=summary_content,
+                    timestamp=time.time(),
+                    message_id=f"summary_{int(time.time() * 1000)}"
+                )
+                self.conversation_history.insert(middle_start, summary_turn)
+        elif self.strategy == "circular":
+            # 순환 버퍼: 가장 오래된 메시지 제거
+            while len(self.conversation_history) > self.max_turns:
+                self.conversation_history.popleft()
+        self._update_context_stats()
+        logger.info(f"✅ 컨텍스트 압축 완료: {len(self.conversation_history)} 턴")
+    def _truncate_context(self, context: str, max_length: int) -> str:
+        """컨텍스트 길이 제한"""
+        if len(context) <= max_length:
+            return context
+        # 가장 최근 메시지부터 유지
+        truncated_context = context[-max_length:]
+        # 메시지 경계 확인
+        if not truncated_context.startswith("<|im_start|>"):
+            # 메시지 경계를 찾아서 자르기
+            start_idx = truncated_context.find("<|im_start|>")
+            if start_idx != -1:
+                truncated_context = truncated_context[start_idx:]
+        return truncated_context
+    def export_context(self, file_path: str = None) -> str:
+        """컨텍스트를 파일로 내보내기"""
+        if not file_path:
+            file_path = f"context_export_{int(time.time())}.json"
+        export_data = {
+            "export_timestamp": time.time(),
+            "system_prompt": self.system_prompt,
+            "conversation_history": [
+                {
+                    "role": turn.role,
+                    "content": turn.content,
+                    "timestamp": turn.timestamp,
+                    "message_id": turn.message_id,
+                    "metadata": turn.metadata
+                }
+                for turn in self.conversation_history
+            ],
+            "context_stats": self.get_context_summary()
+        }
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(export_data, f, ensure_ascii=False, indent=2)
+        logger.info(f"💾 컨텍스트 내보내기 완료: {file_path}")
+        return file_path
+    def import_context(self, file_path: str) -> bool:
+        """파일에서 컨텍스트 가져오기"""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                import_data = json.load(f)
+            # 기존 컨텍스트 초기화
+            self.clear_context()
+            # 시스템 프롬프트 복원
+            if "system_prompt" in import_data:
+                self.system_prompt = import_data["system_prompt"]
+            # 대화 히스토리 복원
+            if "conversation_history" in import_data:
+                for turn_data in import_data["conversation_history"]:
+                    turn = ConversationTurn(
+                        role=turn_data["role"],
+                        content=turn_data["content"],
+                        timestamp=turn_data["timestamp"],
+                        message_id=turn_data["message_id"],
+                        metadata=turn_data.get("metadata", {})
+                    )
+                    self.conversation_history.append(turn)
+            self._update_context_stats()
+            logger.info(f"📥 컨텍스트 가져오기 완료: {file_path}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ 컨텍스트 가져오기 실패: {e}")
+            return False
+    def get_memory_efficiency(self) -> Dict[str, float]:
+        """메모리 효율성 지표 반환"""
+        return {
+            "context_utilization": len(self.conversation_history) / self.max_turns,
+            "token_efficiency": self.total_tokens / self.max_tokens if self.max_tokens > 0 else 0,
+            "compression_ratio": 1.0 - (len(self.conversation_history) / (self.max_turns * 2)),
+            "memory_fragmentation": self._calculate_fragmentation()
+        }
+    def _calculate_fragmentation(self) -> float:
+        """메모리 단편화 정도 계산"""
+        if len(self.conversation_history) <= 1:
+            return 0.0
+        # 연속된 메시지 간의 시간 간격으로 단편화 계산
+        timestamps = [turn.timestamp for turn in self.conversation_history]
+        intervals = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps)-1)]
+        if not intervals:
+            return 0.0
+        avg_interval = sum(intervals) / len(intervals)
+        variance = sum((x - avg_interval) ** 2 for x in intervals) / len(intervals)
+        # 정규화된 단편화 점수 (0-1)
+        return min(1.0, variance / (avg_interval ** 2) if avg_interval > 0 else 0.0)
+# 전역 컨텍스트 관리자 인스턴스
+context_manager = ContextManager()
+def get_context_manager() -> ContextManager:
+    """전역 컨텍스트 관리자 반환"""
+    return context_manager

lily_llm_core/document_processor.py CHANGED Viewed

@@ -90,12 +90,12 @@ class DocumentProcessor:
                 logger.error(f"❌ OCR 리더 초기화 실패: {e}")
                 self.ocr_reader = None
-        # 수식 추출 엔진 설정
-        if formula_ocr_engine in ['mathpix', 'latexocr']:
             try:
-                from .formula_extractor import image_to_latex_mathpix, run_latex_ocr
                 self.formula_extractor_available = True
-                logger.info(f"✅ 수식 추출 엔진 설정: {formula_ocr_engine}")
             except ImportError:
                 self.formula_extractor_available = False
                 logger.warning(f"⚠️ 수식 추출 엔진 {formula_ocr_engine} 사용 불가, EasyOCR로 대체")

                 logger.error(f"❌ OCR 리더 초기화 실패: {e}")
                 self.ocr_reader = None
+        # 수식 추출 엔진 설정 (LaTeX-OCR 비활성화됨)
+        if formula_ocr_engine in ['mathpix']:  # 'latexocr' 제거
             try:
+                from .formula_extractor import image_to_latex_mathpix  # run_latex_ocr 제거
                 self.formula_extractor_available = True
+                logger.info(f"✅ 수식 추출 엔진 설정: {formula_ocr_engine} (LaTeX-OCR 비활성화됨)")
             except ImportError:
                 self.formula_extractor_available = False
                 logger.warning(f"⚠️ 수식 추출 엔진 {formula_ocr_engine} 사용 불가, EasyOCR로 대체")

lily_llm_core/hybrid_rag_processor.py CHANGED Viewed

@@ -14,9 +14,9 @@ from .rag_processor import rag_processor
 from .image_rag_processor import image_rag_processor
 from .latex_rag_processor import latex_rag_processor
-# 새로운 LaTeX-OCR + FAISS 시스템
-from latex_ocr_faiss_integrated import LatexOCRFAISSIntegrated
-from latex_ocr_faiss_simple import LatexOCRFAISSSimple
 # 로깅 설정
 logging.basicConfig(level=logging.INFO)
@@ -31,19 +31,16 @@ class HybridRAGProcessor:
         self.image_rag = image_rag_processor
         self.latex_rag = latex_rag_processor
-        # LaTeX-OCR + FAISS 시스템
         self.latex_ocr_faiss_simple = None
         self.latex_ocr_faiss_integrated = None
-        self._init_latex_ocr_faiss()
     def _init_latex_ocr_faiss(self):
-        """LaTeX-OCR + FAISS 시스템 초기화"""
-        try:
-            self.latex_ocr_faiss_simple = LatexOCRFAISSSimple()
-            self.latex_ocr_faiss_integrated = LatexOCRFAISSIntegrated()
-            logger.info("✅ LaTeX-OCR + FAISS 시스템 초기화 완료")
-        except Exception as e:
-            logger.error(f"❌ LaTeX-OCR + FAISS 시스템 초기화 실패: {e}")
     def process_document_hybrid(self, file_path: str, user_id: str, document_id: str) -> Dict[str, Any]:
         """문서를 멀티모달로 처리"""
@@ -92,17 +89,12 @@ class HybridRAGProcessor:
                 logger.error(f"❌ LaTeX 처리 실패: {e}")
                 results["latex_processing"] = {"success": False, "error": str(e)}
-            # 4. LaTeX-OCR + FAISS 처리 (새로운)
-            try:
-                if self.latex_ocr_faiss_integrated:
-                    latex_ocr_result = self.latex_ocr_faiss_integrated.process_pdf_with_latex(file_path, user_id)
-                    results["latex_ocr_faiss_processing"] = latex_ocr_result
-                    logger.info(f"✅ LaTeX-OCR + FAISS 처리 완료: {latex_ocr_result.get('latex_count', 0)}개 수식")
-                else:
-                    results["latex_ocr_faiss_processing"] = {"success": False, "error": "LaTeX-OCR + FAISS 시스템이 초기화되지 않았습니다."}
-            except Exception as e:
-                logger.error(f"❌ LaTeX-OCR + FAISS 처리 실패: {e}")
-                results["latex_ocr_faiss_processing"] = {"success": False, "error": str(e)}
             # 전체 성공 여부 판단
             success_count = sum(1 for key, value in results.items()
@@ -123,7 +115,7 @@ class HybridRAGProcessor:
     def generate_hybrid_response(self, query: str, user_id: str, document_id: str,
                                 use_text: bool = True, use_image: bool = True,
-                                use_latex: bool = True, use_latex_ocr: bool = True,
                                 max_length: Optional[int] = None,
                                 temperature: Optional[float] = None,
                                 top_p: Optional[float] = None,
@@ -182,22 +174,15 @@ class HybridRAGProcessor:
                     logger.error(f"❌ LaTeX RAG 응답 생성 실패: {e}")
                     responses["latex"] = {"success": False, "error": str(e)}
-            # 4. LaTeX-OCR + FAISS 응답 (새로운)
-            if use_latex_ocr and self.latex_ocr_faiss_integrated:
-                try:
-                    latex_ocr_response = self.latex_ocr_faiss_integrated.search_formulas(
-                        query, user_id, document_id, k=5
-                    )
-                    responses["latex_ocr_faiss"] = latex_ocr_response
-                    if latex_ocr_response.get("success"):
-                        context = "\n".join([f"수식: {result['formula']} (유사도: {result['similarity']:.3f})"
-                                           for result in latex_ocr_response.get('results', [])])
-                        all_contexts.append(f"[LaTeX-OCR+FAISS] {context}")
-                        all_sources.extend(latex_ocr_response.get('results', []))
-                    logger.info("✅ LaTeX-OCR + FAISS 응답 생성 완료")
-                except Exception as e:
-                    logger.error(f"❌ LaTeX-OCR + FAISS 응답 생성 실패: {e}")
-                    responses["latex_ocr_faiss"] = {"success": False, "error": str(e)}
             # 통합 응답 생성
             success_count = sum(1 for response in responses.values() if response.get('success', False))
@@ -269,6 +254,12 @@ class HybridRAGProcessor:
             except Exception as e:
                 logger.error(f"LaTeX 문서 정보 조회 실패: {e}")
             return {
                 "success": True,
                 "document_id": document_id,

 from .image_rag_processor import image_rag_processor
 from .latex_rag_processor import latex_rag_processor
+# LaTeX-OCR + FAISS 시스템 비활성화 (ModuleNotFoundError 해결)
+# from latex_ocr_faiss_integrated import LatexOCRFAISSIntegrated
+# from latex_ocr_faiss_simple import LatexOCRFAISSSimple
 # 로깅 설정
 logging.basicConfig(level=logging.INFO)
         self.image_rag = image_rag_processor
         self.latex_rag = latex_rag_processor
+        # LaTeX-OCR + FAISS 시스템 비활성화
         self.latex_ocr_faiss_simple = None
         self.latex_ocr_faiss_integrated = None
+        # self._init_latex_ocr_faiss()  # 비활성화
     def _init_latex_ocr_faiss(self):
+        """LaTeX-OCR + FAISS 시스템 초기화 (비활성화됨)"""
+        # LaTeX-OCR 기능이 완전히 비활성화됨
+        logger.info("⚠️ LaTeX-OCR + FAISS 시스템이 비활성화되었습니다.")
+        pass
     def process_document_hybrid(self, file_path: str, user_id: str, document_id: str) -> Dict[str, Any]:
         """문서를 멀티모달로 처리"""
                 logger.error(f"❌ LaTeX 처리 실패: {e}")
                 results["latex_processing"] = {"success": False, "error": str(e)}
+            # 4. LaTeX-OCR + FAISS 처리 (비활성화됨)
+            results["latex_ocr_faiss_processing"] = {
+                "success": False,
+                "error": "LaTeX-OCR + FAISS 시스템이 비활성화되었습니다."
+            }
+            logger.info("⚠️ LaTeX-OCR + FAISS 처리 건너뜀 (비활성화됨)")
             # 전체 성공 여부 판단
             success_count = sum(1 for key, value in results.items()
     def generate_hybrid_response(self, query: str, user_id: str, document_id: str,
                                 use_text: bool = True, use_image: bool = True,
+                                use_latex: bool = True, use_latex_ocr: bool = False,  # 기본값을 False로 변경
                                 max_length: Optional[int] = None,
                                 temperature: Optional[float] = None,
                                 top_p: Optional[float] = None,
                     logger.error(f"❌ LaTeX RAG 응답 생성 실패: {e}")
                     responses["latex"] = {"success": False, "error": str(e)}
+            # 4. LaTeX-OCR + FAISS 응답 (비활성화됨)
+            if use_latex_ocr:
+                logger.warning("⚠️ LaTeX-OCR + FAISS 기능이 비활성화되었습니다.")
+                responses["latex_ocr_faiss"] = {
+                    "success": False,
+                    "error": "LaTeX-OCR + FAISS 시스템이 비활성화되었습니다."
+                }
+            else:
+                logger.info("⚠️ LaTeX-OCR + FAISS 응답 건너뜀 (비활성화됨)")
             # 통합 응답 생성
             success_count = sum(1 for response in responses.values() if response.get('success', False))
             except Exception as e:
                 logger.error(f"LaTeX 문서 정보 조회 실패: {e}")
+            # LaTeX-OCR + FAISS 정보는 비활성화됨
+            info["latex_ocr_faiss_info"] = {
+                "success": False,
+                "error": "LaTeX-OCR + FAISS 시스템이 비활성화되었습니다."
+            }
             return {
                 "success": True,
                 "document_id": document_id,

lily_llm_core/latex_ocr_processor.py CHANGED Viewed

@@ -20,10 +20,11 @@ except ImportError:
     EASYOCR_AVAILABLE = False
     easyocr = None
-# LaTeX-OCR imports
 try:
-    from pix2tex.cli import LatexOCR
-    LATEXOCR_AVAILABLE = True
 except ImportError:
     LATEXOCR_AVAILABLE = False
     LatexOCR = None

     EASYOCR_AVAILABLE = False
     easyocr = None
+# LaTeX-OCR imports (비활성화됨)
 try:
+    # from pix2tex.cli import LatexOCR  # 비활성화됨
+    LATEXOCR_AVAILABLE = False
+    LatexOCR = None
 except ImportError:
     LATEXOCR_AVAILABLE = False
     LatexOCR = None

lily_llm_core/latex_ocr_subprocess.py CHANGED Viewed

@@ -1,227 +1,37 @@
 #!/usr/bin/env python3
 """
-LaTeX-OCR Subprocess Processor
 LaTeX-OCR을 별도 프로세스로 실행하여 버전 충돌을 방지합니다.
 """
-import subprocess
-import json
-import tempfile
-import os
 import logging
 from typing import List, Dict, Any, Optional
 from PIL import Image
-import base64
-import io
 logger = logging.getLogger(__name__)
 class LaTeXOCRSubprocessProcessor:
-    """LaTeX-OCR을 subprocess로 실행하는 프로세서"""
     def __init__(self):
-        self.latex_ocr_script = self._create_latex_ocr_script()
-    def _create_latex_ocr_script(self) -> str:
-        """LaTeX-OCR 실행 스크립트 생성"""
-        script_content = '''
-import sys
-import os
-import json
-from PIL import Image
-import io
-# LaTeX-OCR 환경 설정
-latex_ocr_env = os.path.join(os.path.dirname(__file__), 'lily_llm_utils', 'LaTeX-OCR')
-if latex_ocr_env not in sys.path:
-    sys.path.insert(0, latex_ocr_env)
-try:
-    from pix2tex.cli import LatexOCR
-    import torch
-    def process_image(image_path):
-        """이미지에서 LaTeX 추출"""
-        try:
-            # LaTeX-OCR 모델 초기화
-            model = LatexOCR()
-            # 이미지 로드 및 처리
-            img = Image.open(image_path)
-            result = model(img)
-            return {
-                "success": True,
-                "latex": result,
-                "error": None
-            }
-        except Exception as e:
-            return {
-                "success": False,
-                "latex": None,
-                "error": str(e)
-            }
-    # 명령행 인자 처리
-    if len(sys.argv) > 1:
-        image_path = sys.argv[1]
-        result = process_image(image_path)
-        print(json.dumps(result, ensure_ascii=False))
-    else:
-        print(json.dumps({"success": False, "latex": None, "error": "No image path provided"}))
-except ImportError as e:
-    print(json.dumps({"success": False, "latex": None, "error": f"Import error: {str(e)}"}))
-except Exception as e:
-    print(json.dumps({"success": False, "latex": None, "error": f"Unexpected error: {str(e)}"}))
-'''
-        # 임시 스크립트 파일 생성
-        script_path = os.path.join(tempfile.gettempdir(), 'latex_ocr_processor.py')
-        with open(script_path, 'w', encoding='utf-8') as f:
-            f.write(script_content)
-        return script_path
     def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
-        """이미지에서 LaTeX 수식 추출"""
-        try:
-            # 이미지를 임시 파일로 저장
-            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
-                image.save(tmp_file.name, 'PNG')
-                tmp_path = tmp_file.name
-            # subprocess로 LaTeX-OCR 실행
-            import sys
-            result = subprocess.run(
-                [sys.executable, self.latex_ocr_script, tmp_path],
-                capture_output=True,
-                text=True,
-                timeout=30  # 30초 타임아웃
-            )
-            # 임시 파일 삭제
-            os.unlink(tmp_path)
-            if result.returncode == 0:
-                try:
-                    output = json.loads(result.stdout.strip())
-                    if output.get("success"):
-                        return output.get("latex")
-                    else:
-                        logger.error(f"LaTeX-OCR 오류: {output.get('error')}")
-                        return None
-                except json.JSONDecodeError:
-                    logger.error(f"JSON 파싱 오류: {result.stdout}")
-                    return None
-            else:
-                logger.error(f"LaTeX-OCR 실행 오류: {result.stderr}")
-                return None
-        except subprocess.TimeoutExpired:
-            logger.error("LaTeX-OCR 실행 시간 초과")
-            return None
-        except Exception as e:
-            logger.error(f"LaTeX-OCR 처리 오류: {e}")
-            return None
-    def extract_latex_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
-        """PDF에서 LaTeX 수식 추출"""
-        try:
-            import fitz  # PyMuPDF
-            doc = fitz.open(pdf_path)
-            results = {
-                "success": True,
-                "pages": [],
-                "total_pages": len(doc),
-                "latex_count": 0
-            }
-            for page_num in range(len(doc)):
-                page = doc.load_page(page_num)
-                # 페이지에서 이미지 추출
-                image_list = page.get_images()
-                page_results = {
-                    "page_num": page_num + 1,
-                    "images": [],
-                    "latex_formulas": []
-                }
-                for img_index, img in enumerate(image_list):
-                    try:
-                        # 이미지 추출
-                        xref = img[0]
-                        pix = fitz.Pixmap(doc, xref)
-                        if pix.n - pix.alpha < 4:  # CMYK: 이미지 변환
-                            pix = fitz.Pixmap(fitz.csRGB, pix)
-                        img_data = pix.tobytes("png")
-                        image = Image.open(io.BytesIO(img_data))
-                        # LaTeX 추출 시도
-                        latex_result = self.extract_latex_from_image(image)
-                        page_results["images"].append({
-                            "index": img_index,
-                            "size": image.size,
-                            "latex": latex_result
-                        })
-                        if latex_result:
-                            page_results["latex_formulas"].append(latex_result)
-                            results["latex_count"] += 1
-                    except Exception as e:
-                        logger.error(f"페이지 {page_num + 1} 이미지 {img_index} 처리 오류: {e}")
-                        continue
-                results["pages"].append(page_results)
-            doc.close()
-            return results
-        except Exception as e:
-            logger.error(f"PDF LaTeX 추출 오류: {e}")
-            return {
-                "success": False,
-                "error": str(e),
-                "pages": [],
-                "total_pages": 0,
-                "latex_count": 0
-            }
-    def process_image_file(self, image_path: str) -> Dict[str, Any]:
-        """이미지 파일에서 LaTeX 추출"""
-        try:
-            image = Image.open(image_path)
-            latex_result = self.extract_latex_from_image(image)
-            return {
-                "success": True,
-                "image_path": image_path,
-                "image_size": image.size,
-                "latex": latex_result
-            }
-        except Exception as e:
-            logger.error(f"이미지 파일 처리 오류: {e}")
-            return {
-                "success": False,
-                "error": str(e),
-                "image_path": image_path,
-                "latex": None
-            }
     def cleanup(self):
-        """리소스 정리"""
-        try:
-            if os.path.exists(self.latex_ocr_script):
-                os.unlink(self.latex_ocr_script)
-        except Exception as e:
-            logger.error(f"스크립트 정리 오류: {e}")
-# 전역 인스턴스
-latex_ocr_processor = LaTeXOCRSubprocessProcessor()

 #!/usr/bin/env python3
 """
+LaTeX-OCR Subprocess Processor (비활성화됨)
 LaTeX-OCR을 별도 프로세스로 실행하여 버전 충돌을 방지합니다.
 """
 import logging
 from typing import List, Dict, Any, Optional
 from PIL import Image
 logger = logging.getLogger(__name__)
 class LaTeXOCRSubprocessProcessor:
+    """LaTeX-OCR을 subprocess로 실행하는 프로세서 (비활성화됨)"""
     def __init__(self):
+        logger.warning("⚠️ LaTeX-OCR Subprocess 기능이 비활성화되었습니다.")
     def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
+        """이미지에서 LaTeX 수식 추출 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR 이미지 처리 기능이 비활성화되었습니다.")
+        return None
+    def extract_latex_from_pdf(self, pdf_path: str) -> List[Dict[str, Any]]:
+        """PDF에서 LaTeX 수식 추출 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR PDF 처리 기능이 비활성화되었습니다.")
+        return []
+    def process_batch_images(self, image_paths: List[str]) -> List[Dict[str, Any]]:
+        """배치 이미지 처리 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR 배치 처리 기능이 비활성화되었습니다.")
+        return []
     def cleanup(self):
+        """리소스 정리 (비활성화됨)"""
+        logger.info("⚠️ LaTeX-OCR Subprocess 리소스 정리 (비활성화됨)")
+        pass

lily_llm_core/latex_ocr_subprocess_v2.py CHANGED Viewed

@@ -1,303 +1,52 @@
 #!/usr/bin/env python3
 """
-LaTeX-OCR Subprocess Processor v2
 별도 가상환경을 사용하여 완전히 격리된 LaTeX-OCR 실행
 """
-import subprocess
-import json
-import tempfile
-import os
 import logging
 from typing import List, Dict, Any, Optional
 from PIL import Image
-import base64
-import io
 logger = logging.getLogger(__name__)
 class LaTeXOCRSubprocessV2Processor:
-    """별도 가상환경을 사용하는 LaTeX-OCR 프로세서"""
     def __init__(self, venv_path: str = None):
         """
         Args:
-            venv_path: LaTeX-OCR 전용 가상환경 경로
         """
-        if venv_path is None:
-            # 기본 가상환경 경로 설정
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            venv_path = os.path.join(current_dir, '..', 'latex_ocr_env')
-        self.venv_path = venv_path
-        self.python_executable = self._get_venv_python()
-        self.latex_ocr_script = self._create_latex_ocr_script()
-    def _get_venv_python(self) -> str:
-        """가상환경의 Python 실행 파일 경로 반환"""
-        if os.name == 'nt':  # Windows
-            python_path = os.path.join(self.venv_path, 'Scripts', 'python.exe')
-        else:  # Unix/Linux
-            python_path = os.path.join(self.venv_path, 'bin', 'python')
-        if not os.path.exists(python_path):
-            logger.warning(f"가상환경 Python을 찾을 수 없습니다: {python_path}")
-            logger.info("시스템 Python을 사용합니다.")
-            return None
-        return python_path
-    def _create_latex_ocr_script(self) -> str:
-        """LaTeX-OCR 실행 스크립트 생성"""
-        script_content = '''
-import sys
-import os
-import json
-from PIL import Image
-import io
-try:
-    # LaTeX-OCR 환경 설정
-    latex_ocr_env = os.path.join(os.path.dirname(__file__), 'lily_llm_utils', 'LaTeX-OCR')
-    if latex_ocr_env not in sys.path:
-        sys.path.insert(0, latex_ocr_env)
-    from pix2tex.cli import LatexOCR
-    import torch
-    def process_image(image_path):
-        """이미지에서 LaTeX 추출"""
-        try:
-            # LaTeX-OCR 모델 초기화
-            model = LatexOCR()
-            # 이미지 로드 및 처리
-            img = Image.open(image_path)
-            result = model(img)
-            return {
-                "success": True,
-                "latex": result,
-                "error": None
-            }
-        except Exception as e:
-            return {
-                "success": False,
-                "latex": None,
-                "error": str(e)
-            }
-    # 명령행 인자 처리
-    if len(sys.argv) > 1:
-        image_path = sys.argv[1]
-        result = process_image(image_path)
-        print(json.dumps(result, ensure_ascii=False))
-    else:
-        print(json.dumps({"success": False, "latex": None, "error": "No image path provided"}))
-except ImportError as e:
-    print(json.dumps({"success": False, "latex": None, "error": f"Import error: {str(e)}"}))
-except Exception as e:
-    print(json.dumps({"success": False, "latex": None, "error": f"Unexpected error: {str(e)}"}))
-'''
-        # 임시 스크립트 파일 생성
-        script_path = os.path.join(tempfile.gettempdir(), 'latex_ocr_processor_v2.py')
-        with open(script_path, 'w', encoding='utf-8') as f:
-            f.write(script_content)
-        return script_path
     def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
-        """이미지에서 LaTeX 수식 추출"""
-        try:
-            # 이미지를 임시 파일로 저장
-            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp_file:
-                image.save(tmp_file.name, 'PNG')
-                tmp_path = tmp_file.name
-            # subprocess로 LaTeX-OCR 실행
-            import sys
-            cmd = [self.python_executable or sys.executable, self.latex_ocr_script, tmp_path]
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=30  # 30초 타임아웃
-            )
-            # 임시 파일 삭제
-            os.unlink(tmp_path)
-            if result.returncode == 0:
-                try:
-                    output = json.loads(result.stdout.strip())
-                    if output.get("success"):
-                        return output.get("latex")
-                    else:
-                        logger.error(f"LaTeX-OCR 오류: {output.get('error')}")
-                        return None
-                except json.JSONDecodeError:
-                    logger.error(f"JSON 파싱 오류: {result.stdout}")
-                    return None
-            else:
-                logger.error(f"LaTeX-OCR 실행 오류: {result.stderr}")
-                return None
-        except subprocess.TimeoutExpired:
-            logger.error("LaTeX-OCR 실행 시간 초과")
-            return None
-        except Exception as e:
-            logger.error(f"LaTeX-OCR 처리 오류: {e}")
-            return None
     def extract_latex_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
-        """PDF에서 LaTeX 수식 추출"""
-        try:
-            import fitz  # PyMuPDF
-            doc = fitz.open(pdf_path)
-            results = {
-                "success": True,
-                "pages": [],
-                "total_pages": len(doc),
-                "latex_count": 0
-            }
-            for page_num in range(len(doc)):
-                page = doc.load_page(page_num)
-                # 페이지에서 이미지 추출
-                image_list = page.get_images()
-                page_results = {
-                    "page_num": page_num + 1,
-                    "images": [],
-                    "latex_formulas": []
-                }
-                for img_index, img in enumerate(image_list):
-                    try:
-                        # 이미지 추출
-                        xref = img[0]
-                        pix = fitz.Pixmap(doc, xref)
-                        if pix.n - pix.alpha < 4:  # CMYK: 이미지 변환
-                            pix = fitz.Pixmap(fitz.csRGB, pix)
-                        img_data = pix.tobytes("png")
-                        image = Image.open(io.BytesIO(img_data))
-                        # LaTeX 추출 시도
-                        latex_result = self.extract_latex_from_image(image)
-                        page_results["images"].append({
-                            "index": img_index,
-                            "size": image.size,
-                            "latex": latex_result
-                        })
-                        if latex_result:
-                            page_results["latex_formulas"].append(latex_result)
-                            results["latex_count"] += 1
-                    except Exception as e:
-                        logger.error(f"페이지 {page_num + 1} 이미지 {img_index} 처리 오류: {e}")
-                        continue
-                results["pages"].append(page_results)
-            doc.close()
-            return results
-        except Exception as e:
-            logger.error(f"PDF LaTeX 추출 오류: {e}")
-            return {
-                "success": False,
-                "error": str(e),
-                "pages": [],
-                "total_pages": 0,
-                "latex_count": 0
-            }
     def process_image_file(self, image_path: str) -> Dict[str, Any]:
-        """이미지 파일에서 LaTeX 추출"""
-        try:
-            image = Image.open(image_path)
-            latex_result = self.extract_latex_from_image(image)
-            return {
-                "success": True,
-                "image_path": image_path,
-                "image_size": image.size,
-                "latex": latex_result
-            }
-        except Exception as e:
-            logger.error(f"이미지 파일 처리 오류: {e}")
-            return {
-                "success": False,
-                "error": str(e),
-                "image_path": image_path,
-                "latex": None
-            }
-    def create_venv(self) -> bool:
-        """LaTeX-OCR 전용 가상환경 생성"""
-        try:
-            import venv
-            if os.path.exists(self.venv_path):
-                logger.info(f"가상환경이 이미 존재합니다: {self.venv_path}")
-                return True
-            logger.info(f"LaTeX-OCR 전용 가상환경 생성 중: {self.venv_path}")
-            venv.create(self.venv_path, with_pip=True)
-            # LaTeX-OCR 의존성 설치
-            self._install_latex_ocr_dependencies()
-            return True
-        except Exception as e:
-            logger.error(f"가상환경 생성 오류: {e}")
-            return False
-    def _install_latex_ocr_dependencies(self):
-        """LaTeX-OCR 의존성 설치"""
-        try:
-            # pip 업그레이드
-            subprocess.run([
-                self.python_executable, "-m", "pip", "install", "--upgrade", "pip"
-            ], check=True)
-            # LaTeX-OCR 의존성 설치
-            dependencies = [
-                "torch==2.0.1",
-                "transformers==4.30.0",
-                "timm==0.6.13",
-                "numpy==1.24.3",
-                "Pillow",
-                "requests"
-            ]
-            for dep in dependencies:
-                subprocess.run([
-                    self.python_executable, "-m", "pip", "install", dep
-                ], check=True)
-            logger.info("LaTeX-OCR 의존성 설치 완료")
-        except Exception as e:
-            logger.error(f"의존성 설치 오류: {e}")
     def cleanup(self):
-        """리소스 정리"""
-        try:
-            if os.path.exists(self.latex_ocr_script):
-                os.unlink(self.latex_ocr_script)
-        except Exception as e:
-            logger.error(f"스크립트 정리 오류: {e}")
-# 전역 인스턴스
-latex_ocr_processor_v2 = LaTeXOCRSubprocessV2Processor()

 #!/usr/bin/env python3
 """
+LaTeX-OCR Subprocess Processor v2 (비활성화됨)
 별도 가상환경을 사용하여 완전히 격리된 LaTeX-OCR 실행
 """
 import logging
 from typing import List, Dict, Any, Optional
 from PIL import Image
 logger = logging.getLogger(__name__)
 class LaTeXOCRSubprocessV2Processor:
+    """별도 가상환경을 사용하는 LaTeX-OCR 프로세서 (비활성화됨)"""
     def __init__(self, venv_path: str = None):
         """
         Args:
+            venv_path: LaTeX-OCR 전용 가상환경 경로 (사용되지 않음)
         """
+        logger.warning("⚠️ LaTeX-OCR Subprocess V2 기능이 비활성화되었습니다.")
     def extract_latex_from_image(self, image: Image.Image) -> Optional[str]:
+        """이미지에서 LaTeX 수식 추출 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR V2 이미지 처리 기능이 비활성화되었습니다.")
+        return None
     def extract_latex_from_pdf(self, pdf_path: str) -> Dict[str, Any]:
+        """PDF에서 LaTeX 수식 추출 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR V2 PDF 처리 기능이 비활성화되었습니다.")
+        return {
+            "success": False,
+            "error": "LaTeX-OCR V2 기능이 비활성화되었습니다.",
+            "pages": [],
+            "total_pages": 0,
+            "latex_count": 0
+        }
     def process_image_file(self, image_path: str) -> Dict[str, Any]:
+        """이미지 파일에서 LaTeX 추출 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR V2 이미지 파일 처리 기능이 비활성화되었습니다.")
+        return {
+            "success": False,
+            "error": "LaTeX-OCR V2 기능이 비활성화되었습니다.",
+            "image_path": image_path,
+            "latex": None
+        }
     def cleanup(self):
+        """리소스 정리 (비활성화됨)"""
+        logger.info("⚠️ LaTeX-OCR V2 Subprocess 리소스 정리 (비활성화됨)")
+        pass

lily_llm_core/latex_rag_processor.py CHANGED Viewed

@@ -8,7 +8,7 @@ import logging
 from typing import List, Dict, Any, Optional
 from pathlib import Path
-from .latex_ocr_processor import latex_ocr_processor
 from .vector_store_manager import vector_store_manager
 logger = logging.getLogger(__name__)
@@ -24,241 +24,61 @@ class LaTeXRAGProcessor:
         self.max_search_results = max_search_results
     def process_and_store_latex_document(self, user_id: str, document_id: str, file_path: str) -> Dict[str, Any]:
-        """LaTeX-OCR 문서 처리 및 벡터 스토어에 저장"""
-        try:
-            logger.info(f"🧮 LaTeX-OCR 문서 처리 시작: {file_path}")
-            # 파일 확장자 확인
-            file_ext = Path(file_path).suffix.lower()
-            if file_ext == '.pdf':
-                # PDF LaTeX-OCR 처리
-                documents = latex_ocr_processor.process_pdf_with_latex_ocr(file_path)
-            elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
-                # 이미지 파일 LaTeX-OCR 처리
-                documents = latex_ocr_processor.process_image_file_with_latex(file_path)
-            else:
-                return {
-                    "success": False,
-                    "error": f"지원하지 않는 파일 형식입니다: {file_ext}"
-                }
-            if not documents:
-                return {
-                    "success": False,
-                    "error": "이미지에서 텍스트나 수식을 추출할 수 없습니다."
-                }
-            # 벡터 스토어에 저장
-            success = vector_store_manager.add_documents(user_id, document_id, documents)
-            if success:
-                # 통계 정보 계산
-                total_text_length = sum(doc.metadata.get('text_length', 0) for doc in documents)
-                total_latex_length = sum(doc.metadata.get('latex_length', 0) for doc in documents)
-                has_latex_count = sum(1 for doc in documents if doc.metadata.get('has_latex', False))
-                return {
-                    "success": True,
-                    "document_id": document_id,
-                    "chunks": len(documents),
-                    "message": "LaTeX-OCR 문서가 성공적으로 처리되었습니다.",
-                    "processing_type": "latex_ocr",
-                    "total_text_length": total_text_length,
-                    "total_latex_length": total_latex_length,
-                    "has_latex_count": has_latex_count
-                }
-            else:
-                return {
-                    "success": False,
-                    "error": "벡터 스토어 저장에 실패했습니다."
-                }
-        except Exception as e:
-            logger.error(f"❌ LaTeX-OCR 문서 처리 실패: {e}")
-            return {
-                "success": False,
-                "error": str(e)
-            }
     def generate_latex_rag_response(self, user_id: str, document_id: str, query: str) -> Dict[str, Any]:
-        """LaTeX-OCR 기반 RAG 응답 생성"""
-        try:
-            logger.info(f"🧮 LaTeX-OCR RAG 검색 시작: {query}")
-            # 유사한 문서 검색
-            similar_docs = vector_store_manager.search_similar(
-                user_id, document_id, query, k=self.max_search_results
-            )
-            if not similar_docs:
-                return {
-                    "success": False,
-                    "response": "관련된 LaTeX-OCR 문서를 찾을 수 없습니다.",
-                    "context": "",
-                    "sources": [],
-                    "search_results": 0,
-                    "processing_type": "latex_ocr"
-                }
-            # 컨텍스트 구성
-            context = self._build_context(similar_docs)
-            # 소스 정보 추출
-            sources = self._extract_sources(similar_docs)
-            # 응답 생성
-            response = self._generate_latex_response(query, context, similar_docs)
-            logger.info(f"🧮 LaTeX-OCR RAG 응답 생성 완료: {len(similar_docs)}개 결과")
-            return {
-                "success": True,
-                "response": response,
-                "context": context,
-                "sources": sources,
-                "search_results": len(similar_docs),
-                "processing_type": "latex_ocr",
-                "has_images": True,
-                "has_latex": any(doc.metadata.get('has_latex', False) for doc in similar_docs),
-                "image_count": len(similar_docs)
-            }
-        except Exception as e:
-            logger.error(f"❌ LaTeX-OCR RAG 응답 생성 실패: {e}")
-            return {
-                "success": False,
-                "response": f"LaTeX-OCR RAG 응답 생성 중 오류가 발생했습니다: {str(e)}",
-                "context": "",
-                "sources": [],
-                "search_results": 0,
-                "processing_type": "latex_ocr"
-            }
     def _build_context(self, documents: List) -> str:
-        """문서 컨텍스트 구성"""
-        if not documents:
-            return ""
-        context_parts = []
-        for doc in documents:
-            content = doc.page_content.strip()
-            if content:
-                # 메타데이터 정보 추가
-                metadata_info = []
-                if 'page' in doc.metadata:
-                    metadata_info.append(f"페이지 {doc.metadata['page']}")
-                if 'image_name' in doc.metadata:
-                    metadata_info.append(f"이미지: {doc.metadata['image_name']}")
-                if doc.metadata.get('has_latex', False):
-                    metadata_info.append("LaTeX 수식 포함")
-                if metadata_info:
-                    context_parts.append(f"[{' | '.join(metadata_info)}] {content}")
-                else:
-                    context_parts.append(content)
-        return "\n\n".join(context_parts)
     def _extract_sources(self, documents: List) -> List[Dict[str, Any]]:
-        """소스 정보 추출"""
-        sources = []
-        for doc in documents:
-            source_info = {
-                "content": doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content,
-                "page": doc.metadata.get('page', 'N/A'),
-                "image_name": doc.metadata.get('image_name', 'N/A'),
-                "processing_type": doc.metadata.get('processing_type', 'latex_ocr'),
-                "has_text": doc.metadata.get('has_text', False),
-                "has_latex": doc.metadata.get('has_latex', False),
-                "text_length": doc.metadata.get('text_length', 0),
-                "latex_length": doc.metadata.get('latex_length', 0)
-            }
-            sources.append(source_info)
-        return sources
     def _generate_latex_response(self, query: str, context: str, documents: List) -> str:
-        """LaTeX-OCR 응답 생성"""
-        if not context:
-            return "이미지에서 관련 정보를 찾을 수 없습니다."
-        # LaTeX 수식 포함 여부 확인
-        has_latex = any(doc.metadata.get('has_latex', False) for doc in documents)
-        response = f"LaTeX-OCR 분석 결과를 바탕으로 답변드립니다:\n\n"
-        response += f"📋 검색된 내용:\n{context}\n\n"
-        if has_latex:
-            response += f"🧮 수학 수식 분석: 이미지에서 LaTeX 수식을 추출하여 분석했습니다.\n"
-            response += f"💡 분석: 텍스트와 수학 수식을 모두 고려한 종합적인 분석 결과입니다."
-        else:
-            response += f"💡 분석: 이미지에서 추출된 텍스트를 기반으로 관련 정보를 제공했습니다."
-        return response
     def get_latex_document_info(self, user_id: str, document_id: str) -> Dict[str, Any]:
-        """LaTeX-OCR 문서 정보 조회"""
-        try:
-            store_path = vector_store_manager.get_document_store_path(user_id, document_id)
-            if not store_path.exists():
-                return {
-                    "success": False,
-                    "error": "문서를 찾을 수 없습니다."
-                }
-            # 벡터 스토어 로드
-            vector_store = vector_store_manager.load_vector_store(store_path)
-            if not vector_store:
-                return {
-                    "success": False,
-                    "error": "벡터 스토어를 로드할 수 없습니다."
-                }
-            # 문서 정보 수집
-            documents = []
-            total_text_length = 0
-            total_latex_length = 0
-            has_latex_count = 0
-            for doc_id in vector_store.index_to_docstore_id:
-                doc = vector_store.docstore._dict[doc_id]
-                if doc.metadata.get('processing_type') == 'latex_ocr':
-                    documents.append({
-                        "page": doc.metadata.get('page', 'N/A'),
-                        "image_name": doc.metadata.get('image_name', 'N/A'),
-                        "content_preview": doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content,
-                        "has_text": doc.metadata.get('has_text', False),
-                        "has_latex": doc.metadata.get('has_latex', False),
-                        "text_length": doc.metadata.get('text_length', 0),
-                        "latex_length": doc.metadata.get('latex_length', 0)
-                    })
-                    total_text_length += doc.metadata.get('text_length', 0)
-                    total_latex_length += doc.metadata.get('latex_length', 0)
-                    if doc.metadata.get('has_latex', False):
-                        has_latex_count += 1
-            return {
-                "success": True,
-                "document_id": document_id,
-                "total_chunks": len(documents),
-                "processing_type": "latex_ocr",
-                "total_text_length": total_text_length,
-                "total_latex_length": total_latex_length,
-                "has_latex_count": has_latex_count,
-                "documents": documents
-            }
-        except Exception as e:
-            logger.error(f"❌ LaTeX-OCR 문서 정보 조회 실패: {e}")
-            return {
-                "success": False,
-                "error": str(e)
-            }
 # 전역 인스턴스
 latex_rag_processor = LaTeXRAGProcessor()

 from typing import List, Dict, Any, Optional
 from pathlib import Path
+# from .latex_ocr_processor import latex_ocr_processor  # 비활성화됨
 from .vector_store_manager import vector_store_manager
 logger = logging.getLogger(__name__)
         self.max_search_results = max_search_results
     def process_and_store_latex_document(self, user_id: str, document_id: str, file_path: str) -> Dict[str, Any]:
+        """LaTeX-OCR 문서 처리 및 벡터 스토어에 저장 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR 기능이 비활성화되었습니다.")
+        return {
+            "success": False,
+            "error": "LaTeX-OCR 기능이 비활성화되었습니다.",
+            "document_id": document_id,
+            "chunks": 0,
+            "message": "LaTeX-OCR 기능이 비활성화되었습니다.",
+            "processing_type": "latex_ocr_disabled",
+            "total_text_length": 0,
+            "total_latex_length": 0,
+            "has_latex_count": 0
+        }
     def generate_latex_rag_response(self, user_id: str, document_id: str, query: str) -> Dict[str, Any]:
+        """LaTeX-OCR 기반 RAG 응답 생성 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR RAG 기능이 비활성화되었습니다.")
+        return {
+            "success": False,
+            "response": "LaTeX-OCR RAG 기능이 비활성화되었습니다.",
+            "context": "",
+            "sources": [],
+            "search_results": 0,
+            "processing_type": "latex_ocr_disabled",
+            "has_images": False,
+            "has_latex": False,
+            "image_count": 0
+        }
     def _build_context(self, documents: List) -> str:
+        """문서 컨텍스트 구성 (비활성화됨)"""
+        return "LaTeX-OCR 기능이 비활성화되었습니다."
     def _extract_sources(self, documents: List) -> List[Dict[str, Any]]:
+        """소스 정보 추출 (비활성화됨)"""
+        return []
     def _generate_latex_response(self, query: str, context: str, documents: List) -> str:
+        """LaTeX-OCR 응답 생성 (비활성화됨)"""
+        return "LaTeX-OCR 기능이 비활성화되었습니다."
     def get_latex_document_info(self, user_id: str, document_id: str) -> Dict[str, Any]:
+        """LaTeX-OCR 문서 정보 조회 (비활성화됨)"""
+        logger.warning("⚠️ LaTeX-OCR 문서 정보 조회 기능이 비활성화되었습니다.")
+        return {
+            "success": False,
+            "error": "LaTeX-OCR 기능이 비활성화되었습니다.",
+            "document_id": document_id,
+            "total_chunks": 0,
+            "processing_type": "latex_ocr_disabled",
+            "total_text_length": 0,
+            "total_latex_length": 0,
+            "has_latex_count": 0,
+            "documents": []
+        }
 # 전역 인스턴스
 latex_rag_processor = LaTeXRAGProcessor()

lily_llm_core/lora_manager.py ADDED Viewed

	@@ -0,0 +1,521 @@

+#!/usr/bin/env python3
+"""
+LoRA/QLoRA 관리자 (LoRA Manager)
+LoRA 어댑터를 로드하고 관리하는 시스템
+"""
+import logging
+import os
+import json
+import torch
+from typing import Dict, Any, Optional, List, Union
+from pathlib import Path
+import warnings
+import time
+# logger를 먼저 정의
+logger = logging.getLogger(__name__)
+# PEFT 관련 import (설치되지 않은 경우 경고)
+try:
+    logger.info("🔍 PEFT 라이브러리 import 시도 중...")
+    from peft import (
+        LoraConfig,
+        get_peft_model,
+        PeftModel,
+        TaskType,
+        prepare_model_for_kbit_training
+    )
+    from peft.utils import get_peft_model_state_dict
+    PEFT_AVAILABLE = True
+    logger.info("✅ PEFT 라이브러리 import 성공")
+except ImportError as e:
+    PEFT_AVAILABLE = False
+    logger.error(f"❌ PEFT 라이브러리 import 실패: {e}")
+    logger.error(f"❌ Python 경로: {os.environ.get('PYTHONPATH', 'Not set')}")
+    logger.error(f"❌ 현재 작업 디렉토리: {os.getcwd()}")
+    warnings.warn(f"PEFT 라이브러리가 설치되지 않았습니다. LoRA 기능을 사용할 수 없습니다. 오류: {e}")
+# Transformers 관련 import
+try:
+    logger.info("🔍 Transformers 라이브러리 import 시도 중...")
+    from transformers import (
+        AutoModelForCausalLM,
+        AutoTokenizer,
+        BitsAndBytesConfig,
+        TrainingArguments,
+        Trainer,
+        DataCollatorForLanguageModeling
+    )
+    TRANSFORMERS_AVAILABLE = True
+    logger.info("✅ Transformers 라이브러리 import 성공")
+except ImportError as e:
+    TRANSFORMERS_AVAILABLE = False
+    logger.error(f"❌ Transformers 라이브러리 import 실패: {e}")
+    warnings.warn(f"Transformers 라이브러리가 설치되지 않았습니다. 오류: {e}")
+class LoRAManager:
+    """LoRA/QLoRA 모델 관리 클래스"""
+    def __init__(self, base_model_path: str = None, device: str = "auto"):
+        """
+        Args:
+            base_model_path: 기본 모델 경로
+            device: 사용할 디바이스 ('auto', 'cpu', 'cuda', 'mps')
+        """
+        logger.info(f"🔧 LoRA 관리자 초기화 시작: PEFT_AVAILABLE={PEFT_AVAILABLE}, TRANSFORMERS_AVAILABLE={TRANSFORMERS_AVAILABLE}")
+        if not PEFT_AVAILABLE:
+            logger.error("❌ PEFT 라이브러리를 사용할 수 없습니다.")
+            logger.error("❌ pip install peft를 실행했는지 확인하세요.")
+            logger.error("❌ 가상환경이 활성화되어 있는지 확인하세요.")
+            raise ImportError("PEFT 라이브러리가 필요합니다. pip install peft를 실행하세요.")
+        if not TRANSFORMERS_AVAILABLE:
+            logger.error("❌ Transformers 라이브러리를 사용할 수 없습니다.")
+            logger.error("❌ pip install transformers를 실행했는지 확인하세요.")
+            raise ImportError("Transformers 라이브러리가 필요합니다. pip install transformers를 실행하세요.")
+        self.base_model_path = base_model_path
+        self.device = self._get_device(device)
+        # 모델 및 토크나이저
+        self.base_model = None
+        self.tokenizer = None
+        self.lora_model = None
+        # LoRA 설정
+        self.lora_config = None
+        self.current_adapter_name = None
+        # 어댑터 저장 경로
+        self.adapters_dir = Path("lora_adapters")
+        self.adapters_dir.mkdir(exist_ok=True)
+        # 로드된 어댑터 목록
+        self.loaded_adapters = {}
+        logger.info(f"🔧 LoRA 관리자 초기화: device={self.device}")
+    def _get_device(self, device: str) -> str:
+        """사용 가능한 디바이스 확인"""
+        if device == "auto":
+            if torch.cuda.is_available():
+                return "cuda"
+            elif torch.backends.mps.is_available():
+                return "mps"
+            else:
+                return "cpu"
+        return device
+    def load_base_model(self, model_path: str = None, model_type: str = "causal_lm") -> bool:
+        """기본 모델 로드"""
+        try:
+            model_path = model_path or self.base_model_path
+            if not model_path:
+                raise ValueError("모델 경로가 지정되지 않았습니다.")
+            logger.info(f"📥 기본 모델 로딩 시작: {model_path}")
+            # 토크나이저 로드
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                trust_remote_code=True,
+                local_files_only=os.path.exists(model_path)
+            )
+            # 패딩 토큰 설정
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            # 모델 로드
+            if model_type == "causal_lm":
+                self.base_model = AutoModelForCausalLM.from_pretrained(
+                    model_path,
+                    trust_remote_code=True,
+                    local_files_only=os.path.exists(model_path),
+                    torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                    device_map="auto" if self.device == "cuda" else None
+                )
+            else:
+                raise ValueError(f"지원하지 않는 모델 타입: {model_type}")
+            # 디바이스로 이동
+            if self.device != "cuda":  # cuda는 device_map="auto" 사용
+                self.base_model = self.base_model.to(self.device)
+            self.base_model_path = model_path
+            logger.info(f"✅ 기본 모델 로딩 완료: {model_path}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ 기본 모델 로딩 실패: {e}")
+            return False
+    def create_lora_config(self,
+                          r: int = 16,
+                          lora_alpha: int = 32,
+                          target_modules: List[str] = None,
+                          lora_dropout: float = 0.1,
+                          bias: str = "none",
+                          task_type: str = "CAUSAL_LM") -> LoraConfig:
+        """LoRA 설정 생성"""
+        if target_modules is None:
+            # 일반적인 모델 아키텍처에 대한 기본값
+            target_modules = ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
+        # TaskType 변환
+        task_type_map = {
+            "CAUSAL_LM": TaskType.CAUSAL_LM,
+            "SEQ_2_SEQ_LM": TaskType.SEQ_2_SEQ_LM,
+            "SEQUENCE_CLASSIFICATION": TaskType.SEQUENCE_CLASSIFICATION,
+            "TOKEN_CLASSIFICATION": TaskType.TOKEN_CLASSIFICATION,
+            "QUESTION_ANSWERING": TaskType.QUESTION_ANSWERING
+        }
+        task_type_enum = task_type_map.get(task_type, TaskType.CAUSAL_LM)
+        self.lora_config = LoraConfig(
+            r=r,
+            lora_alpha=lora_alpha,
+            target_modules=target_modules,
+            lora_dropout=lora_dropout,
+            bias=bias,
+            task_type=task_type_enum
+        )
+        logger.info(f"🔧 LoRA 설정 생성: r={r}, alpha={lora_alpha}, target_modules={target_modules}")
+        return self.lora_config
+    def apply_lora_to_model(self, adapter_name: str = "default") -> bool:
+        """LoRA를 기본 모델에 적용"""
+        try:
+            if self.base_model is None:
+                raise ValueError("기본 모델이 로드되지 않았습니다.")
+            if self.lora_config is None:
+                raise ValueError("LoRA 설정이 생성되지 않았습니다.")
+            logger.info(f"🔗 LoRA 어댑터 적용 시작: {adapter_name}")
+            # LoRA 모델 생성
+            self.lora_model = get_peft_model(self.base_model, self.lora_config)
+            # 어댑터 이름 설정
+            self.current_adapter_name = adapter_name
+            # 훈련 모드로 설정
+            self.lora_model.train()
+            # 모델 정보 출력
+            self.lora_model.print_trainable_parameters()
+            logger.info(f"✅ LoRA 어댑터 적용 완료: {adapter_name}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ LoRA 어댑터 적용 실패: {e}")
+            return False
+    def load_lora_adapter(self, adapter_path: str, adapter_name: str = None) -> bool:
+        """저장된 LoRA 어댑터 로드"""
+        try:
+            if not os.path.exists(adapter_path):
+                raise FileNotFoundError(f"어댑터 경로를 찾을 수 없습니다: {adapter_path}")
+            if adapter_name is None:
+                adapter_name = Path(adapter_path).stem
+            logger.info(f"📥 LoRA 어댑터 로딩 시작: {adapter_path}")
+            # 기본 모델이 로드되지 않은 경우 로드
+            if self.base_model is None:
+                # 어댑터 설정 파일에서 기본 모델 경로 확인
+                config_path = os.path.join(adapter_path, "adapter_config.json")
+                if os.path.exists(config_path):
+                    with open(config_path, 'r') as f:
+                        config = json.load(f)
+                    base_model_path = config.get("base_model_name_or_path")
+                    if base_model_path:
+                        self.load_base_model(base_model_path)
+            # LoRA 어댑터 로드
+            self.lora_model = PeftModel.from_pretrained(
+                self.base_model,
+                adapter_path,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+            )
+            # 디바이스로 이동
+            if self.device != "cuda":
+                self.lora_model = self.lora_model.to(self.device)
+            self.current_adapter_name = adapter_name
+            self.loaded_adapters[adapter_name] = adapter_path
+            logger.info(f"✅ LoRA 어댑터 로딩 완료: {adapter_name}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ LoRA 어댑터 로딩 실패: {e}")
+            return False
+    def save_lora_adapter(self, adapter_name: str = None, output_dir: str = None) -> bool:
+        """LoRA 어댑터 저장"""
+        try:
+            if self.lora_model is None:
+                raise ValueError("LoRA 모델이 로드되지 않았습니다.")
+            adapter_name = adapter_name or self.current_adapter_name or "default"
+            output_dir = output_dir or str(self.adapters_dir / adapter_name)
+            logger.info(f"💾 LoRA 어댑터 저장 시작: {adapter_name} -> {output_dir}")
+            # 어댑터 저장
+            self.lora_model.save_pretrained(output_dir)
+            # 토크나이저도 저장
+            if self.tokenizer:
+                self.tokenizer.save_pretrained(output_dir)
+            # 어댑터 정보 저장
+            adapter_info = {
+                "adapter_name": adapter_name,
+                "base_model": self.base_model_path,
+                "lora_config": self.lora_config.to_dict() if self.lora_config else None,
+                "created_at": str(torch.tensor(time.time())),
+                "device": self.device
+            }
+            with open(os.path.join(output_dir, "adapter_info.json"), 'w') as f:
+                json.dump(adapter_info, f, indent=2)
+            logger.info(f"✅ LoRA 어댑터 저장 완료: {output_dir}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ LoRA 어댑터 저장 실패: {e}")
+            return False
+    def merge_lora_with_base(self, output_path: str = None) -> bool:
+        """LoRA 어댑터를 기본 모델과 병합"""
+        try:
+            if self.lora_model is None:
+                raise ValueError("LoRA 모델이 로드되지 않았습니다.")
+            output_path = output_path or f"{self.base_model_path}_merged"
+            logger.info(f"🔗 LoRA 어댑터 병합 시작: {output_path}")
+            # 병합된 모델 생성
+            merged_model = self.lora_model.merge_and_unload()
+            # 병합된 모델 저장
+            merged_model.save_pretrained(output_path)
+            # 토크나이저도 저장
+            if self.tokenizer:
+                self.tokenizer.save_pretrained(output_path)
+            logger.info(f"✅ LoRA 어댑터 병합 완료: {output_path}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ LoRA 어댑터 병합 실패: {e}")
+            return False
+    def list_available_adapters(self) -> List[Dict[str, Any]]:
+        """사용 가능한 어댑터 목록 반환"""
+        adapters = []
+        for adapter_dir in self.adapters_dir.iterdir():
+            if adapter_dir.is_dir():
+                config_path = adapter_dir / "adapter_config.json"
+                info_path = adapter_dir / "adapter_info.json"
+                adapter_info = {
+                    "name": adapter_dir.name,
+                    "path": str(adapter_dir),
+                    "config_exists": config_path.exists(),
+                    "info_exists": info_path.exists()
+                }
+                # 어댑터 정보 로드
+                if info_path.exists():
+                    try:
+                        with open(info_path, 'r') as f:
+                            info = json.load(f)
+                        adapter_info.update(info)
+                    except Exception as e:
+                        logger.warning(f"어댑터 정보 로드 실패: {e}")
+                adapters.append(adapter_info)
+        return adapters
+    def get_adapter_stats(self) -> Dict[str, Any]:
+        """어댑터 통계 정보 반환"""
+        if self.lora_model is None:
+            return {"error": "LoRA 모델이 로드되지 않았습니다."}
+        try:
+            # 훈련 가능한 파라미터 수
+            trainable_params = 0
+            all_param = 0
+            for param in self.lora_model.parameters():
+                all_param += param.numel()
+                if param.requires_grad:
+                    trainable_params += param.numel()
+            return {
+                "adapter_name": self.current_adapter_name,
+                "trainable_params": trainable_params,
+                "all_params": all_param,
+                "trainable_ratio": trainable_params / all_param if all_param > 0 else 0,
+                "device": self.device,
+                "model_type": type(self.lora_model).__name__
+            }
+        except Exception as e:
+            logger.error(f"어댑터 통계 수집 실패: {e}")
+            return {"error": str(e)}
+    def switch_adapter(self, adapter_name: str) -> bool:
+        """다른 어댑터로 전환"""
+        try:
+            if adapter_name not in self.loaded_adapters:
+                # 어댑터 로드
+                adapter_path = self.adapters_dir / adapter_name
+                if not adapter_path.exists():
+                    raise FileNotFoundError(f"어댑터를 찾을 수 없습니다: {adapter_name}")
+                return self.load_lora_adapter(str(adapter_path), adapter_name)
+            else:
+                # 이미 로드된 어댑터 사용
+                self.current_adapter_name = adapter_name
+                logger.info(f"🔄 어댑터 전환: {adapter_name}")
+                return True
+        except Exception as e:
+            logger.error(f"❌ 어댑터 전환 실패: {e}")
+            return False
+    def unload_adapter(self) -> bool:
+        """LoRA 어댑터 언로드"""
+        try:
+            if self.lora_model is None:
+                return True
+            logger.info("🗑️ LoRA 어댑터 언로드 시작")
+            # 어댑터 제거
+            self.lora_model = None
+            self.current_adapter_name = None
+            self.lora_config = None
+            logger.info("✅ LoRA 어댑터 언로드 완료")
+            return True
+        except Exception as e:
+            logger.error(f"❌ LoRA 어댑터 언로드 실패: {e}")
+            return False
+    def generate_text(self, prompt: str, max_length: int = 100, temperature: float = 0.7) -> str:
+        """LoRA 모델을 사용한 텍스트 생성"""
+        try:
+            if self.lora_model is None:
+                raise ValueError("LoRA 모델이 로드되지 않았습니다.")
+            if self.tokenizer is None:
+                raise ValueError("토크나이저가 로드되지 않았습니다.")
+            # 입력 토크나이징
+            inputs = self.tokenizer(prompt, return_tensors="pt")
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            # 추론 모드로 설정
+            self.lora_model.eval()
+            with torch.no_grad():
+                outputs = self.lora_model.generate(
+                    **inputs,
+                    max_new_tokens=max_length,
+                    temperature=temperature,
+                    do_sample=True,
+                    pad_token_id=self.tokenizer.eos_token_id
+                )
+            # 응답 디코딩
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # 프롬프트 제거
+            if response.startswith(prompt):
+                response = response[len(prompt):].strip()
+            return response
+        except Exception as e:
+            logger.error(f"❌ 텍스트 생성 실패: {e}")
+            return f"텍스트 생성 중 오류가 발생했습니다: {str(e)}"
+    def prepare_for_training(self, training_args: TrainingArguments = None) -> bool:
+        """훈련을 위한 모델 준비"""
+        try:
+            if self.lora_model is None:
+                raise ValueError("LoRA 모델이 로드되지 않았습니다.")
+            logger.info("🔧 훈련을 위한 모델 준비 시작")
+            # 기본 훈련 인수
+            if training_args is None:
+                training_args = TrainingArguments(
+                    output_dir="./lora_training_output",
+                    num_train_epochs=3,
+                    per_device_train_batch_size=4,
+                    gradient_accumulation_steps=4,
+                    learning_rate=2e-4,
+                    warmup_steps=100,
+                    logging_steps=10,
+                    save_steps=500,
+                    eval_steps=500,
+                    evaluation_strategy="steps",
+                    save_strategy="steps",
+                    load_best_model_at_end=True,
+                    metric_for_best_model="eval_loss",
+                    greater_is_better=False,
+                    fp16=torch.cuda.is_available(),
+                    dataloader_pin_memory=False,
+                )
+            # 훈련 모드로 설정
+            self.lora_model.train()
+            # 그래디언트 체크포인팅 활성화 (메모리 절약)
+            self.lora_model.gradient_checkpointing_enable()
+            # 그래디언트 클리핑 설정
+            self.lora_model.enable_input_require_grads()
+            logger.info("✅ 훈련�� 위한 모델 준비 완료")
+            return True
+        except Exception as e:
+            logger.error(f"❌ 훈련 준비 실패: {e}")
+            return False
+# 전역 LoRA 관리자 인스턴스 (안전한 생성)
+try:
+    if PEFT_AVAILABLE and TRANSFORMERS_AVAILABLE:
+        lora_manager = LoRAManager()
+        logger.info("✅ 전역 LoRA 관리자 인스턴스 생성 완료")
+    else:
+        lora_manager = None
+        logger.warning("⚠️ LoRA 라이브러리가 사용 불가능하여 LoRA 관리자를 생성하지 않았습니다.")
+except Exception as e:
+    lora_manager = None
+    logger.error(f"❌ LoRA 관리자 인스턴스 생성 실패: {e}")
+def get_lora_manager() -> Optional[LoRAManager]:
+    """전역 LoRA 관리자 반환 (None일 수 있음)"""
+    return lora_manager

refresh_tokenizer.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from transformers import AutoTokenizer
+import os
+# 1. 모델의 Hugging Face Hub 이름
+# model_name = "EleutherAI/polyglot-ko-1.3b"
+# model_name = "EleutherAI/polyglot-ko-5.8b"
+model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
+# 2. 토크나이저 파일을 덮어 쓸 로컬 경로
+# save_directory = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+# save_directory = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
+save_directory = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+print(f"'{model_name}' 모델의 토크나이저를 다운로드합니다...")
+try:
+    # Hugging Face Hub에서 인증 정보를 사용하여 토크나이저 다운로드
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    print("✅ 토크나이저 다운로드 성공!")
+    # 지정된 로컬 경로에 최신 형식으로 저장
+    if not os.path.exists(save_directory):
+        os.makedirs(save_directory)
+        print(f"'{save_directory}' 폴더를 생성했습니다.")
+    tokenizer.save_pretrained(save_directory)
+    print(f"✅ 토크나이저를 '{save_directory}' 경로에 성공적으로 저장했습니다.")
+    print("\n이제 Lily LLM 서버를 다시 실행해 주세요.")
+except Exception as e:
+    print(f"\n❌ 오류 발생: {e}")
+    print("모델 이름을 확인하거나 인터넷 연결 상태를 점검해 주세요.")
+# from transformers import AutoTokenizer
+# import os
+# # 1. 토크나이저를 읽고 다시 저장할 로컬 모델 경로
+# # 이 경로에 tokenizer.json 파일이 이미 존재해야 합니다.
+# model_and_tokenizer_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+# print(f"로컬 경로에서 토크나이저를 읽어옵니다: '{model_and_tokenizer_path}'")
+# try:
+#     # 인터넷 연결 없이 로컬 파일만으로 토크나이저 로드
+#     tokenizer = AutoTokenizer.from_pretrained(
+#         model_and_tokenizer_path,
+#         local_files_only=True  # 이 옵션이 핵심입니다!
+#     )
+#     print("✅ 로컬 토크나이저 로드 성공!")
+#     # 현재 라이브러리 버전에 맞는 형식으로 동일한 경로에 다시 저장 (덮어쓰기)
+#     tokenizer.save_pretrained(model_and_tokenizer_path)
+#     print(f"✅ 토크나이저를 '{model_and_tokenizer_path}' 경로에 새 형식으로 다시 저장했습니다.")
+#     print("\n이제 Lily LLM 서버를 다시 실행해 주세요.")
+# except Exception as e:
+#     print(f"\n❌ 오류 발생: {e}")
+#     print(f"'{model_and_tokenizer_path}' 경로에 토크나이저 파일들이 올바르게 존재하는지 확인해 주세요.")

refresh_tokenizer_kanana.py ADDED Viewed

	@@ -0,0 +1,124 @@

+from transformers import AutoTokenizer
+import os
+# 1. 모델의 Hugging Face Hub 이름
+# model_name = "EleutherAI/polyglot-ko-1.3b"
+# model_name = "EleutherAI/polyglot-ko-5.8b"
+model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
+# 2. 토크나이저 파일을 덮어 쓸 로컬 경로
+# save_directory = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+# save_directory = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
+save_directory = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+print(f"'{model_name}' 모델의 토크나이저를 다운로드합니다...")
+try:
+    # Hugging Face Hub에서 인증 정보를 사용하여 토크나이저 다운로드
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    print("✅ 토크나이저 다운로드 성공!")
+    # 지정된 로컬 경로에 최신 형식으로 저장
+    if not os.path.exists(save_directory):
+        os.makedirs(save_directory)
+        print(f"'{save_directory}' 폴더를 생성했습니다.")
+    tokenizer.save_pretrained(save_directory)
+    print(f"✅ 토크나이저를 '{save_directory}' 경로에 성공적으로 저장했습니다.")
+    print("\n이제 Lily LLM 서버를 다시 실행해 주세요.")
+except Exception as e:
+    print(f"\n❌ 오류 발생: {e}")
+    print("모델 이름을 확인하거나 인터넷 연결 상태를 점검해 주세요.")
+# from transformers import AutoTokenizer
+# import os
+# # 1. 토크나이저를 읽고 다시 저장할 로컬 모델 경로
+# # 이 경로에 tokenizer.json 파일이 이미 존재해야 합니다.
+# model_and_tokenizer_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+# print(f"로컬 경로에서 토크나이저를 읽어옵니다: '{model_and_tokenizer_path}'")
+# try:
+#     # 인터넷 연결 없이 로컬 파일만으로 토크나이저 로드
+#     tokenizer = AutoTokenizer.from_pretrained(
+#         model_and_tokenizer_path,
+#         local_files_only=True  # 이 옵션이 핵심입니다!
+#     )
+#     print("✅ 로컬 토크나이저 로드 성공!")
+#     # 현재 라이브러리 버전에 맞는 형식으로 동일한 경로에 다시 저장 (덮어쓰기)
+#     tokenizer.save_pretrained(model_and_tokenizer_path)
+#     print(f"✅ 토크나이저를 '{model_and_tokenizer_path}' 경로에 새 형식으로 다시 저장했습니다.")
+#     print("\n이제 Lily LLM 서버를 다시 실행해 주세요.")
+# except Exception as e:
+#     print(f"\n❌ 오류 발생: {e}")
+#     print(f"'{model_and_tokenizer_path}' 경로에 토크나이저 파일들이 올바르게 존재하는지 확인해 주세요.")
+# from PIL import Image
+# import torch
+# from transformers import AutoModelForVision2Seq, AutoProcessor
+# MODEL = "kakaocorp/kanana-1.5-v-3b-instruct"
+# # Load the model on the available device(s)
+# model = AutoModelForVision2Seq.from_pretrained(
+#     MODEL,
+#     torch_dtype=torch.bfloat16,
+#     device_map="auto",
+#     trust_remote_code=True
+# )
+# model.eval()
+# # Load processor
+# processor = AutoProcessor.from_pretrained(MODEL, trust_remote_code=True)
+# # Prepare input batch
+# batch = []
+# for _ in range(1):  # dummy loop to demonstrate batch processing
+#     image_files = [
+#         "./examples/waybill.png"
+#     ]
+#     sample = {
+#         "image": [Image.open(image_file_path).convert("RGB") for image_file_path in image_files],
+#         "conv": [
+#             {"role": "system", "content": "The following is a conversation between a curious human and AI assistant."},
+#             {"role": "user", "content": " ".join(["<image>"] * len(image_files))},
+#             {"role": "user", "content": "사진에서 보내는 사람과 받는 사람 정보를 json 형태로 정리해줘."},
+#         ]
+#     }
+#     batch.append(sample)
+# inputs = processor.batch_encode_collate(
+#     batch, padding_side="left", add_generation_prompt=True, max_length=8192
+# )
+# inputs = {k: v.to(model.device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
+# # Set the generation config
+# gen_kwargs = {
+#     "max_new_tokens": 2048,
+#     "temperature": 0,
+#     "top_p": 1.0,
+#     "num_beams": 1,
+#     "do_sample": False,
+# }
+# # Generate text
+# gens = model.generate(
+#     **inputs,
+#     **gen_kwargs,
+# )
+# text_outputs = processor.tokenizer.batch_decode(gens, skip_special_tokens=True)
+# print(text_outputs)  # ['```json\n{\n  "보내는분": {\n    "성명": "카카오",\n    "주소": "경기도 성남시 판교역로 166"\n  },\n  "받는분": {\n    "성명": "카나나",\n    "주소": "제주도 제주시 첨단로 242"\n  }\n}\n```']

refresh_tokenizer_polyglot.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from transformers import AutoTokenizer
+import os
+# 1. 모델의 Hugging Face Hub 이름
+model_name = "EleutherAI/polyglot-ko-1.3b"
+# model_name = "EleutherAI/polyglot-ko-5.8b"
+# model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
+# 2. 토크나이저 파일을 덮어 쓸 로컬 경로
+save_directory = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+# save_directory = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
+# save_directory = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+print(f"'{model_name}' 모델의 토크나이저를 다운로드합니다...")
+try:
+    # Hugging Face Hub에서 인증 정보를 사용하여 토크나이저 다운로드
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    print("✅ 토크나이저 다운로드 성공!")
+    # 지정된 로컬 경로에 최신 형식으로 저장
+    if not os.path.exists(save_directory):
+        os.makedirs(save_directory)
+        print(f"'{save_directory}' 폴더를 생성했습니다.")
+    tokenizer.save_pretrained(save_directory)
+    print(f"✅ 토크나이저를 '{save_directory}' 경로에 성공적으로 저장했습니다.")
+    print("\n이제 Lily LLM 서버를 다시 실행해 주세요.")
+except Exception as e:
+    print(f"\n❌ 오류 발생: {e}")
+    print("모델 이름을 확인하거나 인터넷 연결 상태를 점검해 주세요.")
+# from transformers import AutoTokenizer
+# import os
+# # 1. 토크나이저를 읽고 다시 저장할 로컬 모델 경로
+# # 이 경로에 tokenizer.json 파일이 이미 존재해야 합니다.
+# model_and_tokenizer_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+# print(f"로컬 경로에서 토크나이저를 읽어옵니다: '{model_and_tokenizer_path}'")
+# try:
+#     # 인터넷 연결 없이 로컬 파일만으로 토크나이저 로드
+#     tokenizer = AutoTokenizer.from_pretrained(
+#         model_and_tokenizer_path,
+#         local_files_only=True  # 이 옵션이 핵심입니다!
+#     )
+#     print("✅ 로컬 토크나이저 로드 성공!")
+#     # 현재 라이브러리 버전에 맞는 형식으로 동일한 경로에 다시 저장 (덮어쓰기)
+#     tokenizer.save_pretrained(model_and_tokenizer_path)
+#     print(f"✅ 토크나이저를 '{model_and_tokenizer_path}' 경로에 새 형식으로 다시 저장했습니다.")
+#     print("\n이제 Lily LLM 서버를 다시 실행해 주세요.")
+# except Exception as e:
+#     print(f"\n❌ 오류 발생: {e}")
+#     print(f"'{model_and_tokenizer_path}' 경로에 토크나이저 파일들이 올바르게 존재하는지 확인해 주세요.")

requirements.txt CHANGED Viewed

@@ -1,73 +1,53 @@
-# Core FastAPI
-fastapi>=0.104.0
-uvicorn[standard]>=0.24.0
-python-multipart>=0.0.6
-pydantic>=2.0.0
-pydantic-settings>=2.0.0
-# Machine Learning
-torch>=2.0.0
-transformers>=4.35.0
-accelerate>=0.24.0
-tokenizers>=0.15.0
-safetensors>=0.4.0
-# Image Processing
-Pillow>=10.0.0
-opencv-python>=4.8.0
-# Vector Database
-faiss-cpu>=1.7.4
-sentence-transformers>=2.2.2
-# LangChain
-langchain>=0.1.0
-langchain-community>=0.0.10
-# Text Processing
-nltk>=3.8.1
-# HTTP Requests
-requests>=2.31.0
-aiohttp>=3.9.0
-# Utilities
-python-dotenv>=1.0.0
-numpy>=1.24.0
-pandas>=2.1.0
-# PDF Processing
-PyMuPDF>=1.23.0
-# Document Processing
-python-docx>=1.1.0
-python-pptx>=1.0.0
-# OCR
-easyocr>=1.7.0
-pytesseract>=0.3.10
-# Database
-sqlalchemy>=2.0.0
-# Task Queue
-celery>=5.3.0
-redis>=5.0.0
-# Security
-python-jose[cryptography]>=3.3.0
-passlib[bcrypt]>=1.7.4
-# WebSocket
-websockets>=12.0
-# Performance Monitoring
-psutil>=5.9.0
-# Logging
-python-json-logger>=3.0.0
-# additional
-PyJWT==2.8.0
-einops==0.8.1
-timm==1.0.19

+# requirements.txt (Final Version for Kanana)
+# Core ML/DL Stack (Pinned for Kanana Model Compatibility)
+numpy==1.26.4
+torch==2.3.1
+torchvision==0.18.1
+# transformers==4.29.2
+# tokenizers==0.13.3
+transformers
+tokenizers
+peft==0.10.0
+accelerate==0.30.1
+bitsandbytes
+safetensors
+# OCR and Image Processing
+easyocr
+opencv-python-headless
+# Web Framework & Parsers
+fastapi
+uvicorn[standard]
+markdown-it-py==3.0.0
+# Other dependencies
+python-multipart
+pydantic
+pydantic-settings
+Pillow
+faiss-cpu
+sentence-transformers
+langchain
+langchain-community
+nltk
+requests
+aiohttp
+python-dotenv
+pandas
+PyMuPDF
+python-docx
+python-pptx
+pytesseract
+sqlalchemy
+celery
+redis
+python-jose[cryptography]
+passlib[bcrypt]
+websockets
+psutil
+python-json-logger
+PyJWT
+einops
+timm

requirements_250819_0958txt ADDED Viewed

	@@ -0,0 +1,78 @@

+# Core FastAPI
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+python-multipart>=0.0.6
+pydantic>=2.0.0
+pydantic-settings>=2.0.0
+# Machine Learning
+torch>=2.0.0
+transformers>=4.41.2
+accelerate>=0.30.1
+tokenizers>=0.15.0
+safetensors>=0.4.0
+trl>=0.8.6
+# LoRA/QLoRA Support
+peft>=0.10.0
+bitsandbytes>=0.41.0
+# Image Processing
+Pillow>=10.0.0
+opencv-python>=4.8.0
+# Vector Database
+faiss-cpu>=1.7.4
+sentence-transformers>=2.2.2
+# LangChain
+langchain>=0.1.0
+langchain-community>=0.0.10
+# Text Processing
+nltk>=3.8.1
+# HTTP Requests
+requests>=2.31.0
+aiohttp>=3.9.0
+# Utilities
+python-dotenv>=1.0.0
+numpy>=1.24.0
+pandas>=2.1.0
+# PDF Processing
+PyMuPDF>=1.23.0
+# Document Processing
+python-docx>=1.1.0
+python-pptx>=1.0.0
+# OCR
+easyocr>=1.7.0
+pytesseract>=0.3.10
+# Database
+sqlalchemy>=2.0.0
+# Task Queue
+celery>=5.3.0
+redis>=5.0.0
+# Security
+python-jose[cryptography]>=3.3.0
+passlib[bcrypt]>=1.7.4
+# WebSocket
+websockets>=12.0
+# Performance Monitoring
+psutil>=5.9.0
+# Logging
+python-json-logger>=3.0.0
+# additional
+PyJWT==2.8.0
+einops==0.8.1
+timm==1.0.19

requirements_250819_1025.txt ADDED Viewed

	@@ -0,0 +1,56 @@

+# requirements.txt
+# Core ML/DL Stack (Pinned for Stability)
+numpy==1.26.4
+torch==2.3.1
+torchvision==0.18.1
+transformers==4.41.2
+peft==0.10.0
+trl==0.8.6
+accelerate==0.30.1
+bitsandbytes
+safetensors
+tokenizers
+# OCR and Image Processing
+easyocr
+opencv-python-headless
+# Web Framework
+fastapi
+uvicorn[standard]
+# Parsers and Utilities (Pinned to fix conflicts)
+markdown-it-py==3.0.0
+mdit-py-plugins
+myst-parser
+# Other dependencies from your original file
+python-multipart
+pydantic
+pydantic-settings
+Pillow
+faiss-cpu
+sentence-transformers
+langchain
+langchain-community
+nltk
+requests
+aiohttp
+python-dotenv
+pandas
+PyMuPDF
+python-docx
+python-pptx
+pytesseract
+sqlalchemy
+celery
+redis
+python-jose[cryptography]
+passlib[bcrypt]
+websockets
+psutil
+python-json-logger
+PyJWT
+einops
+timm

requirements_full_250819_0721.txt ADDED Viewed

	@@ -0,0 +1,316 @@

+accelerate==1.9.0
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.14
+aiosignal==1.4.0
+alabaster==1.0.0
+albucore==0.0.24
+albumentations==1.4.3
+amqp==5.3.1
+annotated-types==0.7.0
+anyio==4.9.0
+argon2-cffi==25.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asgiref==3.9.1
+asttokens==3.0.0
+async-lru==2.0.5
+asyncio==3.4.3
+attrs==25.3.0
+babel==2.17.0
+backoff==2.2.1
+bcrypt==4.3.0
+beautifulsoup4==4.13.4
+billiard==4.2.1
+bitsandbytes==0.46.1
+bleach==6.2.0
+Brotli==1.1.0
+build==1.3.0
+cachetools==5.5.2
+celery==5.5.3
+certifi==2025.8.3
+cffi==1.17.1
+chardet==5.2.0
+charset-normalizer==3.4.2
+chroma-hnswlib==0.7.3
+chromadb==0.4.24
+click==8.2.1
+click-didyoumean==0.3.1
+click-plugins==1.1.1.2
+click-repl==0.3.0
+colorama==0.4.6
+coloredlogs==15.0.1
+comm==0.2.3
+contourpy==1.3.3
+cryptography==45.0.5
+cycler==0.12.1
+dataclasses-json==0.6.7
+datasets==4.0.0
+debugpy==1.8.15
+decorator==5.2.1
+defusedxml==0.7.1
+dill==0.3.8
+distro==1.9.0
+docutils==0.21.2
+dotenv==0.9.9
+durationpy==0.10
+easyocr==1.7.2
+einops==0.8.1
+einx==0.3.0
+emoji==2.14.1
+entmax==1.3
+et_xmlfile==2.0.0
+executing==2.2.0
+faiss-cpu==1.8.0
+fastapi==0.116.1
+fastjsonschema==2.21.1
+ffmpy==0.6.1
+filelock==3.18.0
+filetype==1.2.0
+flatbuffers==25.2.10
+fonttools==4.59.0
+fqdn==1.5.1
+frozendict==2.4.6
+frozenlist==1.7.0
+fsspec==2025.7.0
+git-filter-repo==2.47.0
+google-auth==2.40.3
+googleapis-common-protos==1.70.0
+gradio==5.41.0
+gradio_client==1.11.0
+greenlet==3.2.3
+groovy==0.1.2
+grpcio==1.74.0
+h11==0.16.0
+h2==4.2.0
+hpack==4.1.0
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+httpx-sse==0.4.1
+huggingface-hub==0.34.3
+humanfriendly==10.0
+hyperframe==6.1.0
+idna==3.10
+imageio==2.37.0
+imagesize==1.4.1
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+ipykernel==6.30.0
+ipython==9.4.0
+ipython_pygments_lexers==1.1.1
+ipywidgets==8.1.7
+isoduration==20.11.0
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.5.1
+json5==0.12.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.25.0
+jsonschema-specifications==2025.4.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.12.0
+jupyter-lsp==2.2.6
+jupyter_client==8.6.3
+jupyter_core==5.8.1
+jupyter_server==2.16.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.4.5
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.8
+kombu==5.5.4
+kubernetes==33.1.0
+langchain==0.3.27
+langchain-community==0.3.27
+langchain-core==0.3.72
+langchain-huggingface==0.3.1
+langchain-text-splitters==0.3.9
+langdetect==1.0.9
+langsmith==0.4.10
+lark==1.2.2
+lazy_loader==0.4
+loguru==0.7.3
+lxml==6.0.0
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+matplotlib==3.10.3
+matplotlib-inline==0.1.7
+mdit-py-plugins==0.4.2
+mdurl==0.1.2
+mistune==3.1.3
+mmh3==5.2.0
+mpmath==1.3.0
+multidict==6.6.3
+multiprocess==0.70.16
+munch==4.0.0
+mypy_extensions==1.1.0
+myst-parser==4.0.1
+nbclient==0.10.2
+nbconvert==7.16.6
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.5
+ninja==1.11.1.4
+nltk==3.8.1
+notebook==7.4.4
+notebook_shim==0.2.4
+numpy==2.2.6
+oauthlib==3.3.1
+onnxruntime==1.22.1
+opencv-python-headless==4.12.0.88
+openpyxl==3.1.5
+opentelemetry-api==1.36.0
+opentelemetry-exporter-otlp-proto-common==1.36.0
+opentelemetry-exporter-otlp-proto-grpc==1.36.0
+opentelemetry-instrumentation==0.57b0
+opentelemetry-instrumentation-asgi==0.57b0
+opentelemetry-instrumentation-fastapi==0.57b0
+opentelemetry-proto==1.36.0
+opentelemetry-sdk==1.36.0
+opentelemetry-semantic-conventions==0.57b0
+opentelemetry-util-http==0.57b0
+orjson==3.11.1
+overrides==7.7.0
+packaging==25.0
+pandas==2.3.1
+pandocfilters==1.5.1
+parso==0.8.4
+peft==0.8.0
+pillow==11.3.0
+platformdirs==4.3.8
+portalocker==3.2.0
+posthog==6.3.3
+prometheus_client==0.22.1
+prompt_toolkit==3.0.51
+propcache==0.3.2
+protobuf==6.31.1
+psutil==7.0.0
+pulsar-client==3.8.0
+pure_eval==0.2.3
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pyclipper==1.3.0.post6
+pycparser==2.22
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+PyJWT==2.8.0
+PyMuPDF==1.23.8
+PyMuPDFb==1.23.7
+pyparsing==3.2.3
+pypdf==4.1.0
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+pyreadline3==3.5.4
+pytesseract==0.3.13
+python-bidi==0.6.6
+python-dateutil==2.9.0.post0
+python-docx==1.1.2
+python-dotenv==1.1.1
+python-iso639==2025.2.18
+python-json-logger==3.3.0
+python-magic==0.4.27
+python-multipart==0.0.20
+python-pptx==1.0.2
+pytz==2025.2
+pywin32==311
+pywinpty==2.0.15
+PyYAML==6.0.2
+pyzmq==27.0.0
+qdrant-client==1.15.1
+RapidFuzz==3.13.0
+redis==6.2.0
+referencing==0.36.2
+regex==2025.7.34
+requests==2.32.4
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rfc3987-syntax==1.1.0
+rich==14.1.0
+roman-numerals-py==3.1.0
+rpds-py==0.26.0
+rsa==4.9.1
+ruff==0.12.7
+safehttpx==0.1.6
+safetensors==0.5.3
+scikit-image==0.25.2
+scikit-learn==1.7.1
+scipy==1.16.1
+seaborn==0.13.2
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentence-transformers==3.0.0
+sentencepiece==0.2.0
+shapely==2.1.1
+shellingham==1.5.4
+simsimd==6.5.0
+six==1.17.0
+sniffio==1.3.1
+snowballstemmer==3.0.1
+soupsieve==2.7
+Sphinx==8.2.3
+sphinxcontrib-applehelp==2.0.0
+sphinxcontrib-devhelp==2.0.0
+sphinxcontrib-htmlhelp==2.1.0
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==2.0.0
+sphinxcontrib-serializinghtml==2.0.0
+SQLAlchemy==2.0.42
+stack-data==0.6.3
+starlette==0.47.2
+stringzilla==3.12.5
+sympy==1.14.0
+tabulate==0.9.0
+tenacity==8.5.0
+terminado==0.18.1
+threadpoolctl==3.6.0
+tifffile==2025.6.11
+timm==1.0.19
+tinycss2==1.4.0
+tokenizers==0.21.4
+tomlkit==0.13.3
+torch==2.7.1
+torchvision==0.22.1
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.54.1
+transformers_modules==0.4.0
+trl==0.19.1
+typer==0.16.0
+types-python-dateutil==2.9.0.20250708
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions==4.14.1
+tzdata==2025.2
+unstructured==0.12.4
+unstructured-client==0.42.0
+uri-template==1.3.0
+urllib3==2.5.0
+uvicorn==0.35.0
+vine==5.1.0
+watchfiles==1.1.0
+wcwidth==0.2.13
+webcolors==24.11.1
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==15.0.1
+widgetsnbextension==4.0.14
+win32_setctime==1.2.0
+wrapt==1.17.2
+x-transformers==0.15.0
+xlsxwriter==3.2.5
+xxhash==3.5.0
+yarl==1.20.1
+zipp==3.23.0
+zstandard==0.23.0

requirements_full_250819_2030.txt ADDED Viewed

	@@ -0,0 +1,133 @@

+accelerate==0.30.1
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.15
+aiosignal==1.4.0
+amqp==5.3.1
+annotated-types==0.7.0
+anyio==4.10.0
+attrs==25.3.0
+bcrypt==4.3.0
+billiard==4.2.1
+bitsandbytes==0.47.0
+celery==5.5.3
+certifi==2025.8.3
+cffi==1.17.1
+charset-normalizer==3.4.3
+click==8.2.1
+click-didyoumean==0.3.1
+click-plugins==1.1.1.2
+click-repl==0.3.0
+colorama==0.4.6
+cryptography==45.0.6
+dataclasses-json==0.6.7
+easyocr==1.7.2
+ecdsa==0.19.1
+einops==0.8.1
+faiss-cpu==1.12.0
+fastapi==0.116.1
+filelock==3.19.1
+frozenlist==1.7.0
+fsspec==2025.7.0
+greenlet==3.2.4
+h11==0.16.0
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+httpx-sse==0.4.1
+huggingface-hub==0.34.4
+idna==3.10
+imageio==2.37.0
+intel-openmp==2021.4.0
+Jinja2==3.1.6
+joblib==1.5.1
+jsonpatch==1.33
+jsonpointer==3.0.0
+kombu==5.5.4
+langchain==0.3.27
+langchain-community==0.3.27
+langchain-core==0.3.74
+langchain-text-splitters==0.3.9
+langsmith==0.4.14
+lazy_loader==0.4
+lxml==6.0.0
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+mdurl==0.1.2
+mkl==2021.4.0
+mpmath==1.3.0
+multidict==6.6.4
+mypy_extensions==1.1.0
+networkx==3.5
+ninja==1.13.0
+nltk==3.9.1
+numpy==1.26.4
+opencv-python-headless==4.11.0.86
+orjson==3.11.2
+packaging==25.0
+pandas==2.3.1
+passlib==1.7.4
+peft==0.10.0
+pillow==11.3.0
+prompt_toolkit==3.0.51
+propcache==0.3.2
+psutil==7.0.0
+pyasn1==0.6.1
+pyclipper==1.3.0.post6
+pycparser==2.22
+pydantic==2.11.7
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+PyJWT==2.10.1
+PyMuPDF==1.26.3
+pytesseract==0.3.13
+python-bidi==0.6.6
+python-dateutil==2.9.0.post0
+python-docx==1.2.0
+python-dotenv==1.1.1
+python-jose==3.5.0
+python-json-logger==3.3.0
+python-multipart==0.0.20
+python-pptx==1.0.2
+pytz==2025.2
+PyYAML==6.0.2
+redis==6.4.0
+regex==2025.7.34
+requests==2.32.5
+requests-toolbelt==1.0.0
+rsa==4.9.1
+safetensors==0.6.2
+scikit-image==0.25.2
+scikit-learn==1.7.1
+scipy==1.16.1
+sentence-transformers==2.2.2
+sentencepiece==0.2.1
+shapely==2.1.1
+six==1.17.0
+sniffio==1.3.1
+SQLAlchemy==2.0.43
+starlette==0.47.2
+sympy==1.14.0
+tbb==2021.13.1
+tenacity==9.1.2
+threadpoolctl==3.6.0
+tifffile==2025.6.11
+timm==1.0.19
+tokenizers==0.21.4
+torch==2.3.1
+torchvision==0.18.1
+tqdm==4.67.1
+transformers==4.55.2
+typing-inspect==0.9.0
+typing-inspection==0.4.1
+typing_extensions==4.14.1
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+vine==5.1.0
+watchfiles==1.1.0
+wcwidth==0.2.13
+websockets==15.0.1
+xlsxwriter==3.2.5
+yarl==1.20.1
+zstandard==0.24.0

test_context_lora.py ADDED Viewed

	@@ -0,0 +1,228 @@

+#!/usr/bin/env python3
+"""
+컨텍스트 관리 및 LoRA 기능 테스트 스크립트
+"""
+import requests
+import json
+import time
+from typing import Dict, Any
+class LilyLLMTester:
+    """Lily LLM API 테스트 클래스"""
+    def __init__(self, base_url: str = "http://localhost:8001"):
+        self.base_url = base_url
+        self.session = requests.Session()
+    def test_context_management(self):
+        """컨텍스트 관리 기능 테스트"""
+        print("🔧 컨텍스트 관리 기능 테스트 시작")
+        print("=" * 50)
+        # 1. 시스템 프롬프트 설정
+        print("1. 시스템 프롬프트 설정...")
+        response = self.session.post(
+            f"{self.base_url}/context/set-system-prompt",
+            data={"prompt": "당신은 한국어로 대화하는 AI 어시스턴트입니다. 모든 응답은 한국어로 해주세요."}
+        )
+        print(f"   결과: {response.json()}")
+        # 2. 사용자 메시지 추가
+        print("\n2. 사용자 메시지 추가...")
+        response = self.session.post(
+            f"{self.base_url}/context/add-message",
+            data={
+                "role": "user",
+                "content": "안녕하세요! 오늘 날씨가 어때요?",
+                "metadata": json.dumps({"session_id": "test_session_1"})
+            }
+        )
+        print(f"   결과: {response.json()}")
+        # 3. 어시스턴트 응답 추가
+        print("\n3. 어시스턴트 응답 추가...")
+        response = self.session.post(
+            f"{self.base_url}/context/add-message",
+            data={
+                "role": "assistant",
+                "content": "안녕하세요! 오늘은 맑고 화창한 날씨입니다. 기온은 20도 정도로 쾌적합니다.",
+                "metadata": json.dumps({"session_id": "test_session_1"})
+            }
+        )
+        print(f"   결과: {response.json()}")
+        # 4. 컨텍스트 조회
+        print("\n4. 컨텍스트 조회...")
+        response = self.session.get(f"{self.base_url}/context/get")
+        print(f"   결과: {response.json()}")
+        # 5. 컨텍스트 요약
+        print("\n5. 컨텍스트 요약...")
+        response = self.session.get(f"{self.base_url}/context/summary")
+        print(f"   결과: {response.json()}")
+        # 6. 컨텍스트 검색
+        print("\n6. 컨텍스트 검색...")
+        response = self.session.get(f"{self.base_url}/context/search?query=날씨")
+        print(f"   결과: {response.json()}")
+        print("\n✅ 컨텍스트 관리 기능 테스트 완료")
+    def test_lora_management(self):
+        """LoRA 관리 기능 테스트"""
+        print("\n🔧 LoRA 관리 기능 테스트 시작")
+        print("=" * 50)
+        # 1. 사용 가능한 어댑터 목록
+        print("1. 사용 가능한 LoRA 어댑터 목록...")
+        response = self.session.get(f"{self.base_url}/lora/adapters")
+        print(f"   결과: {response.json()}")
+        # 2. LoRA 통계 (어댑터가 로드되지 않은 경우)
+        print("\n2. LoRA 통계...")
+        response = self.session.get(f"{self.base_url}/lora/stats")
+        print(f"   결과: {response.json()}")
+        # 3. LoRA 설정 생성
+        print("\n3. LoRA 설정 생성...")
+        response = self.session.post(
+            f"{self.base_url}/lora/create-config",
+            data={
+                "r": 16,
+                "lora_alpha": 32,
+                "target_modules": "q_proj,v_proj,k_proj,o_proj",
+                "lora_dropout": 0.1,
+                "bias": "none",
+                "task_type": "CAUSAL_LM"
+            }
+        )
+        print(f"   결과: {response.json()}")
+        print("\n✅ LoRA 관리 기능 테스트 완료")
+    def test_integrated_generation(self):
+        """통합 생성 기능 테스트 (컨텍스트 포함)"""
+        print("\n🔧 통합 생성 기능 테스트 시작")
+        print("=" * 50)
+        # 1. 컨텍스트를 사용한 텍스트 생성
+        print("1. 컨텍스트를 사용한 텍스트 생성...")
+        response = self.session.post(
+            f"{self.base_url}/generate",
+            data={
+                "prompt": "그럼 내일은 어떤 날씨가 될까요?",
+                "use_context": "true",
+                "session_id": "test_session_1"
+            }
+        )
+        print(f"   결과: {response.json()}")
+        # 2. 컨텍스트 없이 텍스트 생성
+        print("\n2. 컨텍스트 없이 텍스트 생성...")
+        response = self.session.post(
+            f"{self.base_url}/generate",
+            data={
+                "prompt": "간단한 인사말을 해주세요.",
+                "use_context": "false"
+            }
+        )
+        print(f"   결과: {response.json()}")
+        print("\n✅ 통합 생성 기능 테스트 완료")
+    def test_context_export_import(self):
+        """컨텍스트 내보내기/가져오기 테스트"""
+        print("\n🔧 컨텍스트 내보내기/가져오기 테스트 시작")
+        print("=" * 50)
+        # 1. 컨텍스트 내보내기
+        print("1. 컨텍스트 내보내기...")
+        response = self.session.post(
+            f"{self.base_url}/context/export",
+            data={"file_path": "test_context_export.json"}
+        )
+        print(f"   결과: {response.json()}")
+        # 2. 컨텍스트 초기화
+        print("\n2. 컨텍스트 초기화...")
+        response = self.session.post(f"{self.base_url}/context/clear")
+        print(f"   결과: {response.json()}")
+        # 3. 컨텍스트 가져오기
+        print("\n3. 컨텍스트 가져오기...")
+        response = self.session.post(
+            f"{self.base_url}/context/import",
+            data={"file_path": "test_context_export.json"}
+        )
+        print(f"   결과: {response.json()}")
+        print("\n✅ 컨텍스트 내보내기/가져오기 테스트 완료")
+    def test_rag_functionality(self):
+        """RAG 기능 테스트"""
+        print("\n🔧 RAG 기능 테스트 시작")
+        print("=" * 50)
+        # 1. 헬스 체크
+        print("1. 서버 상태 확인...")
+        response = self.session.get(f"{self.base_url}/health")
+        print(f"   결과: {response.json()}")
+        # 2. 사용 가능한 모델 목록
+        print("\n2. 사용 가능한 모델 목록...")
+        response = self.session.get(f"{self.base_url}/models")
+        print(f"   결과: {response.json()}")
+        print("\n✅ RAG 기능 테스트 완료")
+    def run_all_tests(self):
+        """모든 테스트 실행"""
+        print("🚀 Lily LLM 통합 테스트 시작")
+        print("=" * 60)
+        try:
+            # 기본 기능 테스트
+            self.test_rag_functionality()
+            # 컨텍스트 관리 테스트
+            self.test_context_management()
+            # LoRA 관리 테스트
+            self.test_lora_management()
+            # 통합 생성 테스트
+            self.test_integrated_generation()
+            # 컨텍스트 내보내기/가져오기 테스트
+            self.test_context_export_import()
+            print("\n🎉 모든 테스트가 성공적으로 완료되었습니다!")
+        except Exception as e:
+            print(f"\n❌ 테스트 실행 중 오류 발생: {e}")
+            import traceback
+            traceback.print_exc()
+def main():
+    """메인 함수"""
+    print("Lily LLM API 테스트 도구")
+    print("=" * 40)
+    # 서버 URL 입력
+    base_url = input("서버 URL을 입력하세요 (기본값: http://localhost:8001): ").strip()
+    if not base_url:
+        base_url = "http://localhost:8001"
+    # 테스터 생성 및 실행
+    tester = LilyLLMTester(base_url)
+    try:
+        tester.run_all_tests()
+    except KeyboardInterrupt:
+        print("\n\n⏹️ 테스트가 사용자에 의해 중단되었습니다.")
+    except Exception as e:
+        print(f"\n\n❌ 예상치 못한 오류가 발생했습니다: {e}")
+if __name__ == "__main__":
+    main()