Spaces:

gbrabbit
/

lily_fast_api

Running

App Files Files Community

gbrabbit commited on Aug 22, 2025

Commit

149ff1e

1 Parent(s): f143d67

Auto commit at 22-2025-08 16:26:25

Browse files

Files changed (12) hide show

fix_kanana_target_modules.py +83 -0
inspect_kanana_model.py +95 -0
lily_llm_api/app_v2.py +138 -107
lily_llm_api/models/kanana_1_5_v_3b_instruct.py +37 -17
lily_llm_api/models/polyglot_ko_1_3b_chat.py +4 -0
lily_llm_api/models/polyglot_ko_5_8b_chat.py +5 -1
lily_llm_core/lora_manager.py +13 -0
lily_llm_core/rag_processor.py +4 -4
test_lora_integration.py +93 -0
test_model_selection.py +86 -0
test_model_type_fix.py +90 -0
test_rag_integration.py +1 -0

fix_kanana_target_modules.py ADDED Viewed

	@@ -0,0 +1,83 @@

+#!/usr/bin/env python3
+"""
+Kanana 모델의 정확한 target modules 패턴 찾기
+"""
+import sys
+import os
+from pathlib import Path
+# 프로젝트 루트 경로 추가
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+def find_exact_target_modules():
+    """정확한 target modules 패턴 찾기"""
+    print("🔍 Kanana 모델의 정확한 target modules 패턴 찾기...")
+    try:
+        import torch
+        from transformers import AutoModelForVision2Seq
+        model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+        print(f"📥 모델 로딩 중: {model_path}")
+        # 모델 로드
+        model = AutoModelForVision2Seq.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            local_files_only=True,
+            torch_dtype=torch.bfloat16
+        )
+        print(f"✅ 모델 로드 성공!")
+        # language_model 부분의 정확한 모듈 이름 찾기
+        print("\n🎯 Language Model 모듈 검색:")
+        target_candidates = []
+        for name, module in model.named_modules():
+            # language_model 부분만 필터링
+            if name.startswith("language_model.model.layers."):
+                if hasattr(module, 'weight') and module.weight is not None:
+                    module_type = type(module).__name__
+                    # LoRA에 적합한 모듈들 찾기
+                    if any(pattern in name for pattern in ['q_proj', 'k_proj', 'v_proj', 'o_proj']):
+                        target_candidates.append((name, module_type, "Attention"))
+                    elif any(pattern in name for pattern in ['gate_proj', 'up_proj', 'down_proj']):
+                        target_candidates.append((name, module_type, "MLP"))
+        # 결과 출력
+        if target_candidates:
+            print("  ✅ 발견된 target modules:")
+            for name, module_type, category in target_candidates:
+                print(f"    - {name} ({module_type}) - {category}")
+            # 실제 사용할 target modules 추출
+            print("\n📋 실제 사용할 target modules:")
+            target_modules = []
+            for name, _, _ in target_candidates:
+                target_modules.append(name)
+                print(f"    '{name}',")
+            print(f"\n🔢 총 {len(target_modules)}개의 target modules 발견")
+        else:
+            print("  ❌ language_model에서 target modules를 찾을 수 없음")
+        # 모델 해제
+        del model
+        import gc
+        gc.collect()
+        print("\n✅ target modules 검색 완료!")
+    except Exception as e:
+        print(f"❌ target modules 검색 실패: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    find_exact_target_modules()

inspect_kanana_model.py ADDED Viewed

	@@ -0,0 +1,95 @@

+#!/usr/bin/env python3
+"""
+Kanana 모델 구조 확인 스크립트
+"""
+import sys
+import os
+from pathlib import Path
+# 프로젝트 루트 경로 추가
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+def inspect_kanana_model():
+    """Kanana 모델의 구조를 확인하여 target modules 찾기"""
+    print("🔍 Kanana 모델 구조 확인 시작...")
+    try:
+        import torch
+        from transformers import AutoModelForVision2Seq, AutoProcessor
+        model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+        print(f"📥 모델 로딩 중: {model_path}")
+        # 프로세서 로드
+        processor = AutoProcessor.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            local_files_only=True
+        )
+        # 모델 로드
+        model = AutoModelForVision2Seq.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            local_files_only=True,
+            torch_dtype=torch.bfloat16
+        )
+        print(f"✅ 모델 로드 성공!")
+        print(f"🔍 모델 타입: {type(model).__name__}")
+        print(f"🔍 모델 구조:")
+        # 모델의 모든 named_modules 확인
+        print("\n📋 모든 named_modules:")
+        for name, module in model.named_modules():
+            if hasattr(module, 'weight') and module.weight is not None:
+                print(f"  - {name}: {type(module).__name__}")
+        # 일반적인 LoRA target modules 패턴 찾기
+        print("\n🎯 LoRA target modules 후보:")
+        target_candidates = []
+        for name, module in model.named_modules():
+            module_type = type(module).__name__
+            # 일반적인 LoRA target modules 패턴
+            if any(pattern in name.lower() for pattern in ['q_proj', 'k_proj', 'v_proj', 'o_proj']):
+                target_candidates.append((name, module_type, "Attention"))
+            elif any(pattern in name.lower() for pattern in ['gate_proj', 'up_proj', 'down_proj']):
+                target_candidates.append((name, module_type, "MLP"))
+            elif any(pattern in name.lower() for pattern in ['query_key_value', 'dense_h_to_4h', 'dense_4h_to_h']):
+                target_candidates.append((name, module_type, "GPTNeoX"))
+            elif any(pattern in name.lower() for pattern in ['fc1', 'fc2', 'proj']):
+                target_candidates.append((name, module_type, "Linear"))
+        # 결과 출력
+        if target_candidates:
+            print("  ✅ 발견된 target modules:")
+            for name, module_type, category in target_candidates:
+                print(f"    - {name} ({module_type}) - {category}")
+        else:
+            print("  ❌ 일반적인 패턴을 찾을 수 없음")
+        # 모델의 첫 번째 레이어 구조 자세히 보기
+        print("\n🔍 첫 번째 레이어 구조:")
+        for name, module in list(model.named_modules())[:20]:
+            if hasattr(module, 'weight') and module.weight is not None:
+                print(f"  - {name}: {type(module).__name__} (shape: {module.weight.shape})")
+        # 모델 해제
+        del model
+        del processor
+        import gc
+        gc.collect()
+        print("\n✅ 모델 구조 확인 완료!")
+    except Exception as e:
+        print(f"❌ 모델 구조 확인 실패: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    inspect_kanana_model()

lily_llm_api/app_v2.py CHANGED Viewed

@@ -54,6 +54,11 @@ from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
 # 컨텍스트 관리자 및 LoRA 관리자 추가
 from lily_llm_core.context_manager import get_context_manager, context_manager
 # LoRA 관리자 import (선택적)
 try:
     from lily_llm_core.lora_manager import get_lora_manager, lora_manager
@@ -65,6 +70,124 @@ except ImportError as e:
     lora_manager = None
     get_lora_manager = None
 # ===== lifespan 컨텍스트 매니저 (서버 시작/종료 이벤트) =====
 from contextlib import asynccontextmanager
@@ -81,9 +204,9 @@ async def lifespan(app: FastAPI):
     except Exception as e:
         logger.error(f"❌ CPU 스레드 설정 실패: {e}")
-    # 기본 모델 자동 로드 (polyglot-ko-1.3b-chat)
-    selected_model_id = "polyglot-ko-1.3b-chat"
-    logger.info(f"🚀 서버 시작 시 기본 모델 자동 로드: {selected_model_id}")
     try:
         await load_model_async(selected_model_id)
@@ -110,47 +233,8 @@ async def lifespan(app: FastAPI):
         except Exception as e:
             logger.warning(f"⚠️ 고급 컨텍스트 관리자 설정 실패: {e}")
-        # LoRA 자동 설정 (모델 로드 완료 후)
-        if LORA_AVAILABLE and lora_manager:
-            try:
-                logger.info("🔧 서버 시작 후 LoRA 자동 설정 시작...")
-                # 모델 경로 설정
-                current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
-                logger.info(f"🔍 LoRA 모델 경로: {current_model_path}")
-                # LoRA 기본 모델 로드
-                logger.info("🔧 LoRA 기본 모델 로드 시작...")
-                success = lora_manager.load_base_model(current_model_path, "causal_lm")
-                if success:
-                    logger.info("✅ LoRA 기본 모델 로드 성공")
-                    # LoRA 설정 생성
-                    logger.info("🔧 LoRA 설정 생성 시작...")
-                    lora_config = lora_manager.create_lora_config(
-                        r=16,
-                        lora_alpha=32,
-                        lora_dropout=0.1,
-                        bias="none",
-                        task_type="CAUSAL_LM",
-                        target_modules=["query_key_value", "mlp.dense_h_to_4h", "mlp.dense_4h_to_h"]
-                    )
-                    logger.info("✅ LoRA 설정 생성 완료")
-                    # LoRA 어댑터 적용
-                    logger.info("🔧 LoRA 어댑터 적용 시작...")
-                    adapter_success = lora_manager.apply_lora_to_model("auto_adapter")
-                    if adapter_success:
-                        logger.info("✅ LoRA 어댑터 적용 완료: auto_adapter")
-                        logger.info("🎉 서버 시작 시 LoRA 자동 설정 완료!")
-                    else:
-                        logger.error("❌ LoRA 어댑터 적용 실패")
-                else:
-                    logger.error("❌ LoRA 기본 모델 로드 실패")
-            except Exception as e:
-                logger.error(f"❌ LoRA 자동 설정 중 오류: {e}")
-        else:
-            logger.warning("⚠️ LoRA가 사용 불가능하여 자동 설정 건너뜀")
     except Exception as e:
         logger.error(f"❌ 모델 로드에 실패했습니다: {e}", exc_info=True)
@@ -325,14 +409,14 @@ def select_model_interactive():
         print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
     while True:
         try:
-            # choice = input(f"\n📝 사용할 모델 번호를 선택하세요 (1-{len(available_models)}): ")
-            # selected_model = available_models[int(choice) - 1]
-            selected_model = available_models[1]
             print(f"\n✅ '{selected_model['name']}' 모델을 선택했습니다.")
             return selected_model['model_id']
         except (ValueError, IndexError):
             print(f"❌ 1에서 {len(available_models)} 사이의 숫자를 입력해주세요.")
-        except KeyboardInterrupt: sys.exit("\n\n👋 프로그램을 종료합니다.")
 # @app.on_event("startup") - FastAPI 최신 버전에서 작동하지 않음
 # startup_event 함수는 lifespan으로 이동됨
@@ -358,7 +442,7 @@ async def load_model_endpoint(model_id: str):
 def load_model_sync(model_id: str):
     """모델 및 관련 프로세서를 동기적으로 로딩 (최종 수정본)"""
-    global model, tokenizer, processor, current_profile
     try:
         if model is not None:
@@ -377,6 +461,9 @@ def load_model_sync(model_id: str):
         # 이제 load_model은 (model, processor)를 반환합니다.
         model, processor = current_profile.load_model()
         # processor에서 tokenizer를 꺼내 전역 변수에 할당합니다.
         if hasattr(processor, 'tokenizer'):
             tokenizer = processor.tokenizer
@@ -386,64 +473,8 @@ def load_model_sync(model_id: str):
         logger.info(f"✅ '{current_profile.display_name}' 모델 로딩 완료!")
-        # LoRA 기본 모델 자동 로드
-        try:
-            if LORA_AVAILABLE and lora_manager:
-                # 현재 로드된 모델 경로 확인
-                current_model_path = None
-                if hasattr(current_profile, 'model_path') and current_profile.model_path:
-                    current_model_path = current_profile.model_path
-                    logger.info(f"🔍 모델 경로 직접 사용: {current_model_path}")
-                elif hasattr(current_profile, 'model_id') and current_profile.model_id:
-                    # 모델 ID를 경로로 변환
-                    model_id = current_profile.model_id
-                    logger.info(f"🔍 모델 ID 감지: {model_id}")
-                    if model_id == "polyglot-ko-1.3b-chat":
-                        current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
-                    elif model_id == "kanana-1.5-v-3b-instruct":
-                        current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
-                    elif model_id == "polyglot-ko-5.8b-chat":
-                        current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
-                    logger.info(f"🔍 변환된 모델 경로: {current_model_path}")
-                if current_model_path:
-                    logger.info(f"🔧 LoRA 기본 모델 자동 로드 시작: {current_model_path}")
-                    success = lora_manager.load_base_model(current_model_path, "causal_lm")
-                    if success:
-                        logger.info(f"✅ LoRA 기본 모델 자동 로드 성공: {current_model_path}")
-                        # LoRA 설정 자동 생성
-                        try:
-                            logger.info("🔧 LoRA 설정 자동 생성 시작...")
-                            lora_config = lora_manager.create_lora_config(
-                                r=16,
-                                lora_alpha=32,
-                                lora_dropout=0.1,
-                                bias="none",
-                                task_type="CAUSAL_LM",
-                                target_modules=["query_key_value", "mlp.dense_h_to_4h", "mlp.dense_4h_to_h"]
-                            )
-                            logger.info("✅ LoRA 설정 자동 생성 완료")
-                            # LoRA 어댑터 자동 적용
-                            logger.info("🔧 LoRA 어댑터 자동 적용 시작...")
-                            adapter_success = lora_manager.apply_lora_to_model("auto_adapter")
-                            if adapter_success:
-                                logger.info("✅ LoRA 어댑터 자동 적용 완료: auto_adapter")
-                            else:
-                                logger.error("❌ LoRA 어댑터 자동 적용 실패")
-                        except Exception as e:
-                            logger.error(f"❌ LoRA ���정/어댑터 자동 생성 실패: {e}")
-                    else:
-                        logger.error(f"❌ LoRA 기본 모델 자동 로드 실패: {current_model_path}")
-                else:
-                    logger.warning("⚠️ 현재 모델의 경로를 찾을 수 없어 LoRA 자동 로드 건너뜀")
-            else:
-                logger.info("⚠️ LoRA가 사용 불가능하여 자동 로드 건너뜀")
-        except Exception as e:
-            logger.error(f"❌ LoRA 자동 로드 중 오류 발생: {e}")
     except Exception as e:
         logger.error(f"❌ load_model_sync 실패: {e}")

 # 컨텍스트 관리자 및 LoRA 관리자 추가
 from lily_llm_core.context_manager import get_context_manager, context_manager
+# 전역 변수들
+current_model = None  # 🔄 현재 로드된 모델 인스턴스
+current_profile = None  # 🔄 현재 선택된 모델 프로필
+model_loaded = False  # 🔄 모델 로드 상태
 # LoRA 관리자 import (선택적)
 try:
     from lily_llm_core.lora_manager import get_lora_manager, lora_manager
     lora_manager = None
     get_lora_manager = None
+# ===== 공통 LoRA 설정 함수 =====
+def setup_lora_for_model(profile, lora_manager):
+    """모델 프로필에 따른 LoRA 설정 (공통 함수)"""
+    if not LORA_AVAILABLE or not lora_manager:
+        logger.warning("⚠️ LoRA가 사용 불가능하여 자동 설정 건너뜀")
+        return False
+    try:
+        logger.info("🔧 LoRA 자동 설정 시작...")
+        # 🔄 모델 프로필에서 경로 및 타입 정보 가져오기
+        current_model_path = None
+        model_type = "causal_lm"  # 기본값
+        # 🔄 모델 프로필에서 경로 및 타입 정보 가져오기
+        if hasattr(profile, 'local_path') and profile.local_path:
+            # 로컬 환경: 로컬 경로 사용
+            current_model_path = profile.local_path
+            # 🔄 local_path 사용 시에도 model_type 설정 필요
+            if hasattr(profile, 'model_id') and profile.model_id:
+                model_id = profile.model_id
+                if model_id == "kanana-1.5-v-3b-instruct":
+                    model_type = "vision2seq"  # 🔄 kanana는 vision2seq 타입
+                else:
+                    model_type = "causal_lm"  # 기본값
+            logger.info(f"🔍 모델 프로필에서 로컬 경로 사용: {current_model_path}")
+            logger.info(f"🔍 결정된 모델 타입: {model_type}")
+        elif hasattr(profile, 'model_id') and profile.model_id:
+            # 모델 ID를 기반으로 경로 결정
+            model_id = profile.model_id
+            logger.info(f"🔍 모델 ID 기반 경로 결정: {model_id}")
+            # 🔄 환경에 따른 경로 결정
+            if hasattr(profile, 'is_local') and profile.is_local:
+                # 로컬 환경: 로컬 경로 사용
+                if model_id == "polyglot-ko-1.3b-chat":
+                    current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+                    model_type = "causal_lm"
+                elif model_id == "kanana-1.5-v-3b-instruct":
+                    current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+                    model_type = "vision2seq"  # 🔄 kanana는 vision2seq 타입
+                elif model_id == "polyglot-ko-5.8b-chat":
+                    current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
+                    model_type = "causal_lm"
+            else:
+                # 배포 환경: HF 모델명 사용 (로컬 경로 없음)
+                current_model_path = None
+                logger.info(f"🔍 배포 환경: LoRA 설정 건너뜀 (HF 모델)")
+                return False
+            logger.info(f"🔍 결정된 모델 경로: {current_model_path}")
+            logger.info(f"🔍 결정된 모델 타입: {model_type}")
+        if not current_model_path:
+            logger.warning("⚠️ 현재 모델의 경로를 찾을 수 없어 LoRA 자동 로드 건너뜀")
+            return False
+        logger.info(f"🔍 LoRA 모델 경로: {current_model_path}")
+        logger.info(f"🔍 LoRA 모델 타입: {model_type}")
+        # 🔄 이미 로드된 메인 모델을 LoRA에 직접 적용 (중복 로드 방지)
+        logger.info("🔧 기존 메인 모델에 LoRA 직접 적용 시작...")
+        # 🔄 lora_manager에 기존 메인 모델 설정
+        if hasattr(lora_manager, 'base_model') and lora_manager.base_model is None:
+            # 전역 변수에서 메인 모델 가져오기
+            from lily_llm_api.app_v2 import current_model
+            if current_model is not None:
+                lora_manager.base_model = current_model
+                logger.info("✅ 기존 메인 모델을 LoRA 관리자에 설정 완료")
+            else:
+                logger.warning("⚠️ 메인 모델을 찾을 수 없어 LoRA 설정 건너뜀")
+                return False
+        # LoRA 설정 생성
+        logger.info("🔧 LoRA 설정 생성 시작...")
+        # 🔄 모델별 target modules 설정
+        if model_type == "vision2seq" and "kanana" in profile.model_id:
+            # Kanana 모델: Llama 기반 language model 사용 (첫 번째 레이어만 사용)
+            target_modules = [
+                "language_model.model.layers.0.self_attn.q_proj",
+                "language_model.model.layers.0.self_attn.k_proj",
+                "language_model.model.layers.0.self_attn.v_proj",
+                "language_model.model.layers.0.self_attn.o_proj",
+                "language_model.model.layers.0.mlp.gate_proj",
+                "language_model.model.layers.0.mlp.up_proj",
+                "language_model.model.layers.0.mlp.down_proj"
+            ]
+        else:
+            # 기존 모델들: GPTNeoX 기반
+            target_modules = ["query_key_value", "mlp.dense_h_to_4h", "mlp.dense_4h_to_h"]
+        lora_config = lora_manager.create_lora_config(
+            r=16,
+            lora_alpha=32,
+            lora_dropout=0.1,
+            bias="none",
+            task_type="CAUSAL_LM" if model_type == "causal_lm" else "VISION_2_SEQ",
+            target_modules=target_modules
+        )
+        logger.info("✅ LoRA 설정 생성 완료")
+        # LoRA 어댑터 적용 (기존 메인 모델에 직접)
+        logger.info("🔧 LoRA 어댑터 적용 시작...")
+        adapter_success = lora_manager.apply_lora_to_model("auto_adapter")
+        if adapter_success:
+            logger.info("✅ LoRA 어댑터 적용 완료: auto_adapter")
+            logger.info("🎉 LoRA 자동 설정 완료!")
+            return True
+        else:
+            logger.error("❌ LoRA 어댑터 적용 실패")
+            return False
+    except Exception as e:
+        logger.error(f"❌ LoRA 자동 설정 중 오류: {e}")
+        return False
 # ===== lifespan 컨텍스트 매니저 (서버 시작/종료 이벤트) =====
 from contextlib import asynccontextmanager
     except Exception as e:
         logger.error(f"❌ CPU 스레드 설정 실패: {e}")
+    # 🔄 모델 선택 복원: 사용자가 모델을 선택할 수 있도록
+    selected_model_id = select_model_interactive()
+    logger.info(f"🚀 서버 시작 시 선택된 모델: {selected_model_id}")
     try:
         await load_model_async(selected_model_id)
         except Exception as e:
             logger.warning(f"⚠️ 고급 컨텍스트 관리자 설정 실패: {e}")
+        # 🔄 LoRA 자동 설정은 load_model_async 내부에서 이미 처리됨
+        # setup_lora_for_model(current_profile, lora_manager)  # 중복 호출 제거
     except Exception as e:
         logger.error(f"❌ 모델 로드에 실패했습니다: {e}", exc_info=True)
         print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
     while True:
         try:
+            choice = input(f"\n📝 사용할 모델 번호를 선택하세요 (1-{len(available_models)}): ")
+            selected_model = available_models[int(choice) - 1]
             print(f"\n✅ '{selected_model['name']}' 모델을 선택했습니다.")
             return selected_model['model_id']
         except (ValueError, IndexError):
             print(f"❌ 1에서 {len(available_models)} 사이의 숫자를 입력해주세요.")
+        except KeyboardInterrupt:
+            sys.exit("\n\n👋 프로그램을 종료합니다.")
 # @app.on_event("startup") - FastAPI 최신 버전에서 작동하지 않음
 # startup_event 함수는 lifespan으로 이동됨
 def load_model_sync(model_id: str):
     """모델 및 관련 프로세서를 동기적으로 로딩 (최종 수정본)"""
+    global model, tokenizer, processor, current_profile, current_model
     try:
         if model is not None:
         # 이제 load_model은 (model, processor)를 반환합니다.
         model, processor = current_profile.load_model()
+        # 🔄 전역 변수에 모델 설정 (LoRA에서 사용)
+        current_model = model
         # processor에서 tokenizer를 꺼내 전역 변수에 할당합니다.
         if hasattr(processor, 'tokenizer'):
             tokenizer = processor.tokenizer
         logger.info(f"✅ '{current_profile.display_name}' 모델 로딩 완료!")
+        # 🔄 LoRA 기본 모델 자동 로드 (공통 함수 사용)
+        setup_lora_for_model(current_profile, lora_manager)
     except Exception as e:
         logger.error(f"❌ load_model_sync 실패: {e}")

lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED Viewed

@@ -25,16 +25,21 @@ class Kanana15V3bInstructProfile:
         # 환경 감지
         self.is_local = self._detect_local_environment()
-        # 모델 경로 설정
         if self.is_local:
-            self.model_name = "gbrabbit/lily-math-model"  # 로컬에서도 HF 모델명 사용
             self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
             self.display_name = "kanana-1.5-v-3b-instruct"
         else:
-            self.model_name = "gbrabbit/lily-math-model"  # Hugging Face Hub 모델 경로
-            self.local_path = None  # 서버에서는 로컬 경로 사용 안함
             self.display_name = "kanana-1.5-v-3b-instruct"
         self.description = "카카오 멀티모달 모델 (3.6B) - Math RAG 특화"
         self.language = "ko"
         self.model_size = "3.6B"
@@ -97,15 +102,19 @@ class Kanana15V3bInstructProfile:
             logger.error(f"❌ 환경변수 로드 실패: {e}")
     def load_model(self) -> Tuple[Any, Any]:
-        """모델 로드 (공식적인 방법 + 절대 경로 sys.path 수정 최종본)"""
-        logger.info(f"📥 {self.display_name} 모델 로드 중 (공식 방법)...")
-        # self.local_path를 절대 경로로 변환
-        absolute_model_path = os.path.abspath(self.local_path)
-        use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
-        # model_path 변수에 절대 경로를 사용
-        model_path = absolute_model_path if use_local else self.model_name
         try:
             from transformers import AutoModelForVision2Seq, AutoProcessor
@@ -119,16 +128,17 @@ class Kanana15V3bInstructProfile:
             processor = AutoProcessor.from_pretrained(
                 model_path,
                 trust_remote_code=True,
-                local_files_only=use_local
             )
             device = 'cuda' if torch.cuda.is_available() else 'cpu'
             # dtype 설정 최적화 - CPU에서는 float32 사용
             if device == 'cuda':
-                selected_dtype = torch.float16  # GPU에서는 float16으로 메모리 절약
             else:
-                selected_dtype = torch.float32  # CPU에서는 float32로 안정성 확보
             logger.info(f"🔧 선택된 dtype: {selected_dtype} (device: {device})")
@@ -163,8 +173,16 @@ class Kanana15V3bInstructProfile:
             "top_p": 0.95,
             "repetition_penalty": 1.1,
             "no_repeat_ngram_size": 3,
-            "pad_token_id": None,
-            "eos_token_id": None,
         }
     def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
@@ -220,7 +238,9 @@ class Kanana15V3bInstructProfile:
         # 일반적인 프롬프트 패턴 제거 시도
         patterns_to_remove = [
             "<|im_start|>user\n",
             "<|im_end|>",
             "<image>",
             "user\n",
             "assistant\n"

         # 환경 감지
         self.is_local = self._detect_local_environment()
+        # 🔄 모델 경로 설정 (로컬/배포 환경 모두 지원)
+        self.model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
         if self.is_local:
+            # 로컬 환경: 로컬 경로 우선, 없으면 HF에서 다운로드
             self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
             self.display_name = "kanana-1.5-v-3b-instruct"
         else:
+            # 배포 환경: HF 모델명 사용, 로컬 경로는 None
+            self.local_path = None
             self.display_name = "kanana-1.5-v-3b-instruct"
+        # 🔄 모델 ID 추가 (LoRA 및 기타 설정에서 사용)
+        self.model_id = "kanana-1.5-v-3b-instruct"
         self.description = "카카오 멀티모달 모델 (3.6B) - Math RAG 특화"
         self.language = "ko"
         self.model_size = "3.6B"
             logger.error(f"❌ 환경변수 로드 실패: {e}")
     def load_model(self) -> Tuple[Any, Any]:
+        """모델 로드 (로컬/배포 환경 모두 지원)"""
+        logger.info(f"📥 {self.display_name} 모델 로드 중...")
+        # 🔄 환경에 따른 모델 경로 결정
+        if self.is_local and self.local_path:
+            # 로컬 환경: 로컬 경로 확인
+            absolute_model_path = os.path.abspath(self.local_path)
+            use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
+            model_path = absolute_model_path if use_local else self.model_name
+        else:
+            # 배포 환경: HF 모델명 사용
+            use_local = False
+            model_path = self.model_name
         try:
             from transformers import AutoModelForVision2Seq, AutoProcessor
             processor = AutoProcessor.from_pretrained(
                 model_path,
                 trust_remote_code=True,
+                local_files_only=use_local,
+                use_fast=True  # 🔄 빠른 이미지 프로세서 사용 (경고 제거)
             )
             device = 'cuda' if torch.cuda.is_available() else 'cpu'
             # dtype 설정 최적화 - CPU에서는 float32 사용
             if device == 'cuda':
+                selected_dtype = torch.bfloat16  # GPU에서는 float16으로 메모리 절약
             else:
+                selected_dtype = torch.bfloat16  # CPU에서는 float32로 안정성 확보
             logger.info(f"🔧 선택된 dtype: {selected_dtype} (device: {device})")
             "top_p": 0.95,
             "repetition_penalty": 1.1,
             "no_repeat_ngram_size": 3,
+            "pad_token_id": 128001,
+            "eos_token_id": 128009,
+            "bos_token_id": 128000,
+            "use_cache": True,
+            # "early_stopping": False,
+            # "num_beams": 1,
+            # "num_return_sequences": 1,
+            # "return_full_text": False,
+            # "return_dict": False,
+            # "return_dict_in_generate": False,
         }
     def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
         # 일반적인 프롬프트 패턴 제거 시도
         patterns_to_remove = [
             "<|im_start|>user\n",
+            "<|im_start|>assistant\n",
             "<|im_end|>",
+            "<|im_in_end|>",
             "<image>",
             "user\n",
             "assistant\n"

lily_llm_api/models/polyglot_ko_1_3b_chat.py CHANGED Viewed

@@ -24,6 +24,10 @@ class PolyglotKo13bChatProfile:
         self.model_name = "heegyu/polyglot-ko-1.3b-chat"
         self.local_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
         self.display_name = "Polyglot-ko-1.3b-chat"
         self.description = "한국어 채팅 전용 경량 모델 (1.3B)"
         self.language = "ko"
         self.model_size = "1.3B"

         self.model_name = "heegyu/polyglot-ko-1.3b-chat"
         self.local_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
         self.display_name = "Polyglot-ko-1.3b-chat"
+        # 🔄 모델 ID 추가 (LoRA 및 기타 설정에서 사용)
+        self.model_id = "polyglot-ko-1.3b-chat"
         self.description = "한국어 채팅 전용 경량 모델 (1.3B)"
         self.language = "ko"
         self.model_size = "1.3B"

lily_llm_api/models/polyglot_ko_5_8b_chat.py CHANGED Viewed

@@ -21,6 +21,10 @@ class PolyglotKo58bChatProfile:
         self.model_name = "heegyu/polyglot-ko-5.8b-chat"
         self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
         self.display_name = "Polyglot-ko-5.8b-chat"
         self.description = "한국어 채팅 전용 고성능 모델 (5.8B)"
         self.language = "ko"
         self.model_size = "5.8B"
@@ -85,7 +89,7 @@ class PolyglotKo58bChatProfile:
             # CPU에서는 float32가 더 안정적, CUDA에서는 float16 사용
             device = 'cuda' if torch.cuda.is_available() else 'cpu'
-            selected_dtype = torch.float16 if device == 'cuda' else torch.bfloat16
             model = AutoModelForCausalLM.from_pretrained(
                 model_path,

         self.model_name = "heegyu/polyglot-ko-5.8b-chat"
         self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
         self.display_name = "Polyglot-ko-5.8b-chat"
+        # 🔄 모델 ID 추가 (LoRA 및 기타 설정에서 사용)
+        self.model_id = "polyglot-ko-5.8b-chat"
         self.description = "한국어 채팅 전용 고성능 모델 (5.8B)"
         self.language = "ko"
         self.model_size = "5.8B"
             # CPU에서는 float32가 더 안정적, CUDA에서는 float16 사용
             device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            selected_dtype = torch.bfloat16 if device == 'cuda' else torch.bfloat16
             model = AutoModelForCausalLM.from_pretrained(
                 model_path,

lily_llm_core/lora_manager.py CHANGED Viewed

@@ -156,6 +156,16 @@ class LoRAManager:
                     torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                     device_map="auto" if self.device == "cuda" else None
                 )
             else:
                 raise ValueError(f"지원하지 않는 모델 타입: {model_type}")
@@ -190,6 +200,9 @@ class LoRAManager:
             # 직접 TaskType 사용 (문자열 변환 제거)
             if task_type == "CAUSAL_LM":
                 task_type_enum = TaskType.CAUSAL_LM
             elif task_type == "SEQ_2_SEQ_LM":
                 task_type_enum = TaskType.SEQ_2_SEQ_LM
             elif task_type == "SEQUENCE_CLASSIFICATION":

                     torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                     device_map="auto" if self.device == "cuda" else None
                 )
+            elif model_type == "vision2seq":
+                # 🔄 Vision2Seq 모델 지원 추가 (kanana 등)
+                from transformers import AutoModelForVision2Seq
+                self.base_model = AutoModelForVision2Seq.from_pretrained(
+                    str(model_path),
+                    trust_remote_code=True,
+                    local_files_only=True,
+                    torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.bfloat16,
+                    device_map="auto" if self.device == "cuda" else None
+                )
             else:
                 raise ValueError(f"지원하지 않는 모델 타입: {model_type}")
             # 직접 TaskType 사용 (문자열 변환 제거)
             if task_type == "CAUSAL_LM":
                 task_type_enum = TaskType.CAUSAL_LM
+            elif task_type == "VISION_2_SEQ":
+                # 🔄 Vision2Seq 모델 지원 추가
+                task_type_enum = TaskType.SEQ_2_SEQ_LM  # Vision2Seq는 SEQ_2_SEQ_LM과 유사
             elif task_type == "SEQ_2_SEQ_LM":
                 task_type_enum = TaskType.SEQ_2_SEQ_LM
             elif task_type == "SEQUENCE_CLASSIFICATION":

lily_llm_core/rag_processor.py CHANGED Viewed

@@ -245,7 +245,7 @@ class RAGProcessor:
                 "context": "",
                 "sources": []
             }
     def _generate_text_response(self, query: str, text_docs: List[Document],
                               llm_model, image_files: List[str]) -> Dict[str, Any]:
         """텍스트 기반 응답 생성"""
@@ -255,8 +255,8 @@ class RAGProcessor:
             # 프롬프트 생성
             prompt = f"""
-            질문: {query}
             참고 문서:
             {text_context}
@@ -397,7 +397,7 @@ class RAGProcessor:
                 "document_id": document_id,
                 "error": str(e)
             }
     def get_performance_stats(self) -> Dict[str, Any]:
         """성능 통계 반환"""
         try:

                 "context": "",
                 "sources": []
             }
     def _generate_text_response(self, query: str, text_docs: List[Document],
                               llm_model, image_files: List[str]) -> Dict[str, Any]:
         """텍스트 기반 응답 생성"""
             # 프롬프트 생성
             prompt = f"""
+질문: {query}
             참고 문서:
             {text_context}
                 "document_id": document_id,
                 "error": str(e)
             }
     def get_performance_stats(self) -> Dict[str, Any]:
         """성능 통계 반환"""
         try:

test_lora_integration.py ADDED Viewed

	@@ -0,0 +1,93 @@

+#!/usr/bin/env python3
+"""
+LoRA 통합 및 모델 타입 지원 테스트 스크립트
+"""
+import sys
+import os
+from pathlib import Path
+# 프로젝트 루트 경로 추가
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+def test_lora_integration():
+    """LoRA 통합 테스트"""
+    print("🔍 LoRA 통합 테스트 시작...")
+    try:
+        from lily_llm_api.models import get_model_profile, list_available_models
+        available_models = list_available_models()
+        print(f"📋 사용 가능한 모델: {len(available_models)}개")
+        for model_info in available_models:
+            model_id = model_info['model_id']
+            print(f"\n📝 모델: {model_info['name']} ({model_id})")
+            try:
+                profile = get_model_profile(model_id)
+                print(f"  ✅ 프로필 로드 성공")
+                print(f"  - display_name: {getattr(profile, 'display_name', 'N/A')}")
+                print(f"  - model_id: {getattr(profile, 'model_id', 'N/A')}")
+                print(f"  - local_path: {getattr(profile, 'local_path', 'N/A')}")
+                print(f"  - is_local: {getattr(profile, 'is_local', 'N/A')}")
+                print(f"  - multimodal: {getattr(profile, 'multimodal', 'N/A')}")
+                # LoRA 경로 시뮬레이션
+                print(f"  🔍 LoRA 경로 시뮬레이션:")
+                if hasattr(profile, 'local_path') and profile.local_path:
+                    current_model_path = profile.local_path
+                    print(f"    - 로컬 경로 직접 사용: {current_model_path}")
+                elif hasattr(profile, 'model_id') and profile.model_id:
+                    model_id = profile.model_id
+                    print(f"    - 모델 ID 기반: {model_id}")
+                    if hasattr(profile, 'is_local') and profile.is_local:
+                        # 로컬 환경 시뮬레이션
+                        if model_id == "polyglot-ko-1.3b-chat":
+                            current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+                            model_type = "causal_lm"
+                        elif model_id == "kanana-1.5-v-3b-instruct":
+                            current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+                            model_type = "vision2seq"
+                        elif model_id == "polyglot-ko-5.8b-chat":
+                            current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
+                            model_type = "causal_lm"
+                        print(f"    - 로컬 환경 경로: {current_model_path}")
+                        print(f"    - 모델 타입: {model_type}")
+                    else:
+                        print(f"    - 배포 환경: LoRA 설정 건너뜀")
+                        current_model_path = None
+            except Exception as e:
+                print(f"  ❌ 프로필 로드 실패: {e}")
+        print("\n" + "="*50)
+        print("🎯 공통 LoRA 설정 함수 테스트")
+        print("="*50)
+        # 공통 함수 테스트
+        try:
+            from lily_llm_api.app_v2 import setup_lora_for_model
+            print("✅ 공통 LoRA 설정 함수 import 성공")
+            # 첫 번째 모델로 테스트
+            if available_models:
+                test_model_id = available_models[0]['model_id']
+                test_profile = get_model_profile(test_model_id)
+                print(f"📝 테스트 모델: {test_profile.display_name}")
+                # LoRA 매니저가 없는 상태에서 테스트
+                result = setup_lora_for_model(test_profile, None)
+                print(f"🔍 LoRA 매니저 없음 테스트 결과: {result}")
+        except Exception as e:
+            print(f"❌ 공통 함수 테스트 실패: {e}")
+    except Exception as e:
+        print(f"❌ 전체 테스트 실패: {e}")
+if __name__ == "__main__":
+    test_lora_integration()

test_model_selection.py ADDED Viewed

	@@ -0,0 +1,86 @@

+#!/usr/bin/env python3
+"""
+모델 선택 및 LoRA 경로 설정 테스트 스크립트
+"""
+import sys
+import os
+from pathlib import Path
+# 프로젝트 루트 경로 추가
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+from lily_llm_api.models import get_model_profile, list_available_models
+def test_model_profiles():
+    """모델 프로필들이 올바른 속성을 가지고 있는지 테스트"""
+    print("🔍 모델 프로필 테스트 시작...")
+    available_models = list_available_models()
+    print(f"📋 사용 가능한 모델: {len(available_models)}개")
+    for model_info in available_models:
+        model_id = model_info['model_id']
+        print(f"\n📝 모델: {model_info['name']} ({model_id})")
+        try:
+            profile = get_model_profile(model_id)
+            print(f"  ✅ 프로필 로드 성공")
+            print(f"  - display_name: {getattr(profile, 'display_name', 'N/A')}")
+            print(f"  - model_id: {getattr(profile, 'model_id', 'N/A')}")
+            print(f"  - local_path: {getattr(profile, 'local_path', 'N/A')}")
+            print(f"  - multimodal: {getattr(profile, 'multimodal', 'N/A')}")
+            # 필수 속성 확인
+            required_attrs = ['model_id', 'local_path', 'display_name']
+            missing_attrs = [attr for attr in required_attrs if not hasattr(profile, attr)]
+            if missing_attrs:
+                print(f"  ❌ 누락된 속성: {missing_attrs}")
+            else:
+                print(f"  ✅ 모든 필수 속성 존재")
+        except Exception as e:
+            print(f"  ❌ 프로필 로드 실패: {e}")
+    print("\n" + "="*50)
+    print("🎯 모델 선택 시뮬레이션")
+    print("="*50)
+    # 모델 선택 시뮬레이션
+    for i, model_info in enumerate(available_models, 1):
+        print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
+    # 첫 번째 모델 선택 시뮬레이션
+    if available_models:
+        selected_model = available_models[0]
+        selected_model_id = selected_model['model_id']
+        print(f"\n📝 선택된 모델: {selected_model['name']} ({selected_model_id})")
+        # LoRA 경로 결정 시뮬레이션
+        profile = get_model_profile(selected_model_id)
+        current_model_path = None
+        if hasattr(profile, 'local_path') and profile.local_path:
+            current_model_path = profile.local_path
+            print(f"🔍 모델 프로필에서 경로 직접 사용: {current_model_path}")
+        elif hasattr(profile, 'model_id') and profile.model_id:
+            model_id = profile.model_id
+            print(f"🔍 모델 ID 기반 경로 결정: {model_id}")
+            if model_id == "polyglot-ko-1.3b-chat":
+                current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+            elif model_id == "kanana-1.5-v-3b-instruct":
+                current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+            elif model_id == "polyglot-ko-5.8b-chat":
+                current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
+            print(f"🔍 결정된 모델 경로: {current_model_path}")
+        if current_model_path:
+            print(f"✅ LoRA 경로 결정 성공: {current_model_path}")
+        else:
+            print(f"❌ LoRA 경로 결정 실패")
+if __name__ == "__main__":
+    test_model_profiles()

test_model_type_fix.py ADDED Viewed

	@@ -0,0 +1,90 @@

+#!/usr/bin/env python3
+"""
+모델 타입 설정 테스트 스크립트
+"""
+import sys
+import os
+from pathlib import Path
+# 프로젝트 루트 경로 추가
+project_root = Path(__file__).parent
+sys.path.insert(0, str(project_root))
+def test_model_type_detection():
+    """모델 타입 감지 테스트"""
+    print("🔍 모델 타입 감지 테스트 시작...")
+    try:
+        from lily_llm_api.models import get_model_profile, list_available_models
+        available_models = list_available_models()
+        print(f"📋 사용 가능한 모델: {len(available_models)}개")
+        for model_info in available_models:
+            model_id = model_info['model_id']
+            print(f"\n📝 모델: {model_info['name']} ({model_id})")
+            try:
+                profile = get_model_profile(model_id)
+                print(f"  ✅ 프로필 로드 성공")
+                print(f"  - display_name: {getattr(profile, 'display_name', 'N/A')}")
+                print(f"  - model_id: {getattr(profile, 'model_id', 'N/A')}")
+                print(f"  - local_path: {getattr(profile, 'local_path', 'N/A')}")
+                print(f"  - is_local: {getattr(profile, 'is_local', 'N/A')}")
+                # 🔄 모델 타입 감지 시뮬레이션
+                print(f"  🔍 모델 타입 감지 시뮬레이션:")
+                current_model_path = None
+                model_type = "causal_lm"  # 기본값
+                if hasattr(profile, 'local_path') and profile.local_path:
+                    # 로컬 환경: 로컬 경로 사용
+                    current_model_path = profile.local_path
+                    # 🔄 local_path 사용 시에도 model_type 설정 필요
+                    if hasattr(profile, 'model_id') and profile.model_id:
+                        model_id = profile.model_id
+                        if model_id == "kanana-1.5-v-3b-instruct":
+                            model_type = "vision2seq"  # 🔄 kanana는 vision2seq 타입
+                        else:
+                            model_type = "causal_lm"  # 기본값
+                    print(f"    - 로컬 경로 사용: {current_model_path}")
+                    print(f"    - 결정된 모델 타입: {model_type}")
+                elif hasattr(profile, 'model_id') and profile.model_id:
+                    # 모델 ID를 기반으로 경로 결정
+                    model_id = profile.model_id
+                    print(f"    - 모델 ID 기반: {model_id}")
+                    if hasattr(profile, 'is_local') and profile.is_local:
+                        # 로컬 환경: 로컬 경로 사용
+                        if model_id == "polyglot-ko-1.3b-chat":
+                            current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
+                            model_type = "causal_lm"
+                        elif model_id == "kanana-1.5-v-3b-instruct":
+                            current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
+                            model_type = "vision2seq"
+                        elif model_id == "polyglot-ko-5.8b-chat":
+                            current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
+                            model_type = "causal_lm"
+                        print(f"    - 로컬 환경 경로: {current_model_path}")
+                        print(f"    - 모델 타입: {model_type}")
+                    else:
+                        print(f"    - 배포 환경: LoRA 설정 건너뜀")
+                        current_model_path = None
+                # 최종 결과
+                if current_model_path:
+                    print(f"    ✅ 최종 결과: 경로={current_model_path}, 타입={model_type}")
+                else:
+                    print(f"    ❌ 최종 결과: 경로 없음")
+            except Exception as e:
+                print(f"  ❌ 프로필 로드 실패: {e}")
+    except Exception as e:
+        print(f"❌ 전체 테스트 실패: {e}")
+if __name__ == "__main__":
+    test_model_type_detection()

test_rag_integration.py CHANGED Viewed

	@@ -267,3 +267,4 @@ if __name__ == "__main__":
267	print("\n테스트 완료! 🎉")
268
269


267	print("\n테스트 완료! 🎉")
268
269
270	+