Spaces:

gbrabbit
/

lily_fast_api

Sleeping

App Files Files Community

gbrabbit commited on Aug 9, 2025

Commit

ca781ff

1 Parent(s): eddb502

Auto commit at 09-2025-08 20:34:30

Browse files

Files changed (16) hide show

lily_llm_api/app_v2.py +49 -11
lily_llm_api/models/__init__.py +0 -6
lily_llm_api/models/dialogpt_medium.py +0 -82
lily_llm_api/models/kanana_1_5_2_1b_instruct.py +0 -93
lily_llm_api/models/kanana_1_5_v_3b_instruct.py +6 -3
lily_llm_api/models/kanana_1_5_v_3b_instruct_250809_0055.py +0 -256
lily_llm_api/models/kanana_nano_2_1b_instruct.py +0 -95
lily_llm_api/models/mistral_7b_instruct.py +0 -103
lily_llm_api/models/polyglot_ko_1_3b.py +0 -102
lily_llm_api/models/polyglot_ko_1_3b_chat.py +28 -20
lily_llm_api/models/polyglot_ko_5_8b.py +0 -104
lily_llm_api/models/polyglot_ko_5_8b_chat.py +26 -17
lily_llm_core/config.py +2 -2
test.py +69 -47
test_hf_with_token.py +60 -0
test_log.md +165 -0

lily_llm_api/app_v2.py CHANGED Viewed

@@ -176,7 +176,7 @@ def select_model_interactive():
         try:
             # choice = input(f"\n📝 사용할 모델 번호를 선택하세요 (1-{len(available_models)}): ")
             # selected_model = available_models[int(choice) - 1]
-            selected_model = available_models[1]
             print(f"\n✅ '{selected_model['name']}' 모델을 선택했습니다.")
             return selected_model['model_id']
         except (ValueError, IndexError):
@@ -292,26 +292,55 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
             for key in all_image_metas[0].keys():
                 combined_image_metas[key] = [meta[key] for meta in all_image_metas]
-    # --- 2. 프롬프트 구성 (이미지 유무에 관계없이 공통 실행) ---
     image_tokens = "<image>" * len(all_pixel_values)
-    # 텍스트와 멀티모달 모두 동일한 프롬프트 템플릿 사용
     formatted_prompt = f"<|im_start|>user\n{image_tokens}{prompt}<|im_end|>\n<|im_start|>assistant\n"
-    # --- 3. 토크나이징 (공통 실행) ---
-    # 텍스트와 멀티모달 모두 동일한 커스텀 토크나이저 함수 사용
-    inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
-    input_ids = inputs['input_ids'].unsqueeze(0).to(model.device)
-    attention_mask = inputs['attention_mask'].unsqueeze(0).to(model.device)
     # --- 4. 모델 생성 (공통 실행) ---
     gen_config = current_profile.get_generation_config()
     # max_length 등 사용자 지정 파라미터가 있으면 gen_config에 반영
     if max_length is not None: gen_config['max_new_tokens'] = max_length
     if temperature is not None: gen_config['temperature'] = temperature
     if top_p is not None: gen_config['top_p'] = top_p
     if do_sample is not None: gen_config['do_sample'] = do_sample
     with torch.no_grad():
         if image_processed:
@@ -364,8 +393,17 @@ async def generate(prompt: str = Form(...),
             image_data = await img.read()
             image_data_list.append(image_data)
-    result = await loop.run_in_executor(executor, generate_sync, prompt, image_data_list,
-                                      max_length, temperature, top_p, do_sample)
     processing_time = time.time() - start_time
     logger.info(f"✅ 생성 완료 ({processing_time:.2f}초), 이미지 처리: {result['image_processed']}")

         try:
             # choice = input(f"\n📝 사용할 모델 번호를 선택하세요 (1-{len(available_models)}): ")
             # selected_model = available_models[int(choice) - 1]
+            selected_model = available_models[0]
             print(f"\n✅ '{selected_model['name']}' 모델을 선택했습니다.")
             return selected_model['model_id']
         except (ValueError, IndexError):
             for key in all_image_metas[0].keys():
                 combined_image_metas[key] = [meta[key] for meta in all_image_metas]
+    # --- 2. 프롬프트 구성 ---
     image_tokens = "<image>" * len(all_pixel_values)
+    # Kanana 기본 포맷. 텍스트-only 모델은 profile.format_prompt로 대체됨
     formatted_prompt = f"<|im_start|>user\n{image_tokens}{prompt}<|im_end|>\n<|im_start|>assistant\n"
+    # --- 3. 토크나이징 ---
+    if hasattr(tokenizer, 'encode_prompt'):
+        inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
+        input_ids = inputs['input_ids']
+        attention_mask = inputs['attention_mask']
+    else:
+        # 텍스트-only 모델의 권장 프롬프트 사용
+        if not getattr(current_profile, 'multimodal', False) and hasattr(current_profile, 'format_prompt'):
+            formatted_prompt = current_profile.format_prompt(prompt)
+        inputs = tokenizer(
+            formatted_prompt,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=256,
+        )
+        if 'token_type_ids' in inputs:
+            del inputs['token_type_ids']
+        input_ids = inputs['input_ids']
+        attention_mask = inputs['attention_mask']
+    if input_ids.dim() == 1:
+        input_ids = input_ids.unsqueeze(0)
+    if attention_mask.dim() == 1:
+        attention_mask = attention_mask.unsqueeze(0)
+    input_ids = input_ids.to(model.device)
+    attention_mask = attention_mask.to(model.device)
     # --- 4. 모델 생성 (공통 실행) ---
     gen_config = current_profile.get_generation_config()
+    # CPU에서 과도한 max_new_tokens는 대기 시간을 크게 늘림 → 기본 상한을 보수적으로 조정
+    if gen_config.get('max_new_tokens', 256) > 128 and (not torch.cuda.is_available()):
+        gen_config['max_new_tokens'] = 128
     # max_length 등 사용자 지정 파라미터가 있으면 gen_config에 반영
     if max_length is not None: gen_config['max_new_tokens'] = max_length
     if temperature is not None: gen_config['temperature'] = temperature
     if top_p is not None: gen_config['top_p'] = top_p
     if do_sample is not None: gen_config['do_sample'] = do_sample
+    # pad/eos 보완 (일부 토크나이저는 pad_token 미정의)
+    if gen_config.get('pad_token_id') is None and hasattr(tokenizer, 'pad_token_id'):
+        gen_config['pad_token_id'] = tokenizer.pad_token_id or tokenizer.eos_token_id
+    if gen_config.get('eos_token_id') is None and hasattr(tokenizer, 'eos_token_id'):
+        gen_config['eos_token_id'] = tokenizer.eos_token_id
     with torch.no_grad():
         if image_processed:
             image_data = await img.read()
             image_data_list.append(image_data)
+    # 단일 실행 보장: generate_sync는 오직 한 번만 호출
+    result = await loop.run_in_executor(
+        executor,
+        generate_sync,
+        prompt,
+        image_data_list,
+        max_length,
+        temperature,
+        top_p,
+        do_sample,
+    )
     processing_time = time.time() - start_time
     logger.info(f"✅ 생성 완료 ({processing_time:.2f}초), 이미지 처리: {result['image_processed']}")

lily_llm_api/models/__init__.py CHANGED Viewed

@@ -15,12 +15,6 @@ from .polyglot_ko_5_8b_chat import PolyglotKo58bChatProfile
 # 사용 가능한 모델 프로필들
 AVAILABLE_MODELS = {
-    # "polyglot-ko-1.3b": PolyglotKo13bProfile,
-    # "dialogpt-medium": DialoGPTMediumProfile,
-    # "kanana-1.5-2.1b-instruct": Kanana15V21bInstructProfile,
-    # "kanana-nano-2.1b-instruct": KananaNano21bInstructProfile,
-    # "mistral-7b-instruct": Mistral7bInstructProfile,
-    # "polyglot-ko-5.8b": PolyglotKo58bProfile,
     "polyglot-ko-1.3b-chat": PolyglotKo13bChatProfile,
     "kanana-1.5-v-3b-instruct": Kanana15V3bInstructProfile,
     "polyglot-ko-5.8b-chat": PolyglotKo58bChatProfile,

 # 사용 가능한 모델 프로필들
 AVAILABLE_MODELS = {
     "polyglot-ko-1.3b-chat": PolyglotKo13bChatProfile,
     "kanana-1.5-v-3b-instruct": Kanana15V3bInstructProfile,
     "polyglot-ko-5.8b-chat": PolyglotKo58bChatProfile,

lily_llm_api/models/dialogpt_medium.py DELETED Viewed

@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-"""
-DialoGPT-medium 모델 프로필
-"""
-from typing import Dict, Any, Tuple
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import logging
-logger = logging.getLogger(__name__)
-class DialoGPTMediumProfile:
-    """DialoGPT-medium 모델 프로필"""
-    def __init__(self):
-        self.model_name = "microsoft/DialoGPT-medium"
-        self.local_path = None  # 온라인에서 로드
-        self.display_name = "DialoGPT-medium"
-        self.description = "영어 대화형 모델 (774M)"
-        self.language = "en"
-        self.model_size = "774M"
-    def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        try:
-            # 온라인에서 모델 로드
-            tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-            model = AutoModelForCausalLM.from_pretrained(self.model_name)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
-            raise
-    def format_prompt(self, user_input: str) -> str:
-        """프롬프트 포맷팅"""
-        return f"User: {user_input}\nAssistant:"
-    def extract_response(self, full_text: str, formatted_prompt: str) -> str:
-        """응답 추출"""
-        if "Assistant:" in full_text:
-            response = full_text.split("Assistant:")[-1].strip()
-        else:
-            if formatted_prompt in full_text:
-                response = full_text.replace(formatted_prompt, "").strip()
-            else:
-                response = full_text.strip()
-        return response
-    def get_generation_config(self) -> Dict[str, Any]:
-        """생성 설정"""
-        return {
-            "max_new_tokens": 50,
-            "temperature": 0.9,
-            "do_sample": True,
-            "top_k": 50,
-            "top_p": 0.95,
-            "repetition_penalty": 1.1,
-            "no_repeat_ngram_size": 3,
-            "pad_token_id": None,
-            "eos_token_id": None
-        }
-    def get_model_info(self) -> Dict[str, Any]:
-        """모델 정보"""
-        return {
-            "model_name": self.model_name,
-            "display_name": self.display_name,
-            "description": self.description,
-            "language": self.language,
-            "model_size": self.model_size,
-            "local_path": self.local_path
-        }

lily_llm_api/models/kanana_1_5_2_1b_instruct.py DELETED Viewed

@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-"""
-Kanana 1.5 2.1B Instruct 모델 프로필
-"""
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from typing import Dict, Any, Tuple
-import logging
-logger = logging.getLogger(__name__)
-class Kanana15V21bInstructProfile:
-    """Kanana 1.5 2.1B Instruct 모델 프로필"""
-    def __init__(self):
-        self.model_name = "kakaocorp/kanana-1.5-2.1b-instruct-2505"
-        self.local_path = "./lily_llm_core/models/kanana-1.5-2.1b-instruct"
-        self.display_name = "Kanana 1.5 2.1B Instruct"
-        self.description = "Kakao의 Kanana 1.5 2.1B Instruct 모델"
-        self.language = ["ko", "en"]
-        self.model_size = "2.1B"
-    def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        try:
-            # 토크나이저 로드
-            tokenizer = AutoTokenizer.from_pretrained(
-                self.local_path,
-                trust_remote_code=True,
-                local_files_only=True
-            )
-            # 모델 로드
-            model = AutoModelForCausalLM.from_pretrained(
-                self.local_path,
-                torch_dtype=torch.float32,
-                device_map="cpu",
-                low_cpu_mem_usage=True,
-                local_files_only=True
-            )
-            # 토크나이저 설정
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            if tokenizer.eos_token is None:
-                tokenizer.eos_token = "</s>"
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
-            raise
-    def format_prompt(self, user_input: str) -> str:
-        """프롬프트 포맷팅"""
-        return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
-    def extract_response(self, full_text: str, formatted_prompt: str) -> str:
-        """응답 추출"""
-        if "<|im_start|>assistant\n" in full_text:
-            response = full_text.split("<|im_start|>assistant\n")[-1]
-            if "<|im_end|>" in response:
-                response = response.split("<|im_end|>")[0]
-            return response.strip()
-        return full_text.strip()
-    def get_generation_config(self) -> Dict[str, Any]:
-        """생성 설정"""
-        return {
-            "max_new_tokens": 512,
-            "temperature": 0.7,
-            "top_p": 0.9,
-            "do_sample": True,
-            "repetition_penalty": 1.1,
-            "no_repeat_ngram_size": 3,
-            "pad_token_id": None,  # 토크나이저에서 설정됨
-            "eos_token_id": None,  # 토크나이저에서 설정됨
-        }
-    def get_model_info(self) -> Dict[str, Any]:
-        """모델 정보"""
-        return {
-            "model_name": self.model_name,
-            "display_name": self.display_name,
-            "description": self.description,
-            "language": self.language,
-            "model_size": self.model_size,
-            "local_path": self.local_path
-        }

lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED Viewed

@@ -156,12 +156,13 @@ class Kanana15V3bInstructProfile:
             if use_local:
                 # 로컬 모델: 커스텀 모델링 클래스 사용
-                logger.info("🔍 DEBUG: 로컬 모델 로드 시도")
                 model = KananaVForConditionalGeneration.from_pretrained(
                     model_path,
                     token=HF_TOKEN,
                     trust_remote_code=True,
-                    torch_dtype=torch.bfloat16,
                     local_files_only=True,
                     # low_cpu_mem_usage=True,
                 ).to(DEVICE)
@@ -176,10 +177,12 @@ class Kanana15V3bInstructProfile:
                     raise
                 logger.info("🔍 DEBUG: KananaVForConditionalGeneration.from_pretrained 호출")
                 model = KananaVForConditionalGeneration.from_pretrained(
                     model_path,
                     token=HF_TOKEN,
-                    torch_dtype=torch.float16,
                     trust_remote_code=True,
                     cache_dir="/app/cache/transformers",
                     # device_map="auto",

             if use_local:
                 # 로컬 모델: 커스텀 모델링 클래스 사용
+                logger.info("🔍 DEBUG: 로컬 모델 로드 시도")
+                selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float32
                 model = KananaVForConditionalGeneration.from_pretrained(
                     model_path,
                     token=HF_TOKEN,
                     trust_remote_code=True,
+                    torch_dtype=selected_dtype,
                     local_files_only=True,
                     # low_cpu_mem_usage=True,
                 ).to(DEVICE)
                     raise
                 logger.info("🔍 DEBUG: KananaVForConditionalGeneration.from_pretrained 호출")
+                # CPU 환경에서 float16/bfloat16보다 float32가 더 안정적인 경우가 많음
+                selected_dtype = torch.float16 if DEVICE == "cuda" else torch.float32
                 model = KananaVForConditionalGeneration.from_pretrained(
                     model_path,
                     token=HF_TOKEN,
+                    torch_dtype=selected_dtype,
                     trust_remote_code=True,
                     cache_dir="/app/cache/transformers",
                     # device_map="auto",

lily_llm_api/models/kanana_1_5_v_3b_instruct_250809_0055.py DELETED Viewed

@@ -1,256 +0,0 @@
-#!/usr/bin/env python3
-"""
-Kanana-1.5-v-3b-instruct 모델 프로필 (단순 로딩 최종본)
-"""
-import sys
-from typing import Dict, Any, Tuple
-import torch
-import logging
-from transformers import AutoTokenizer
-import os
-from dotenv import load_dotenv
-load_dotenv()
-HF_TOKEN = os.getenv("HF_TOKEN")
-logger = logging.getLogger(__name__)
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-max_new_tokens = 64
-class Kanana15V3bInstructProfile:
-    """Kanana-1.5-v-3b-instruct 모델 프로필"""
-    def __init__(self):
-        # 환경 감지
-        self.is_local = self._detect_local_environment()
-        # 모델 경로 설정
-        if self.is_local:
-            self.model_name = "gbrabbit/lily-math-model"  # 로컬에서도 HF 모델명 사용
-            self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
-            self.display_name = "Kanana-1.5-v-3b-instruct (로컬)"
-        else:
-            self.model_name = "gbrabbit/lily-math-model"  # Hugging Face Hub 모델 경로
-            self.local_path = None  # 서버에서는 로컬 경로 사용 안함
-            self.display_name = "Kanana-1.5-v-3b-instruct (서버)"
-        self.description = "카카오 멀티모달 모델 (3.6B) - Math RAG 특화"
-        self.language = "ko"
-        self.model_size = "3.6B"
-        self.multimodal = True
-    def _detect_local_environment(self) -> bool:
-        """로컬 환경인지 감지"""
-        import os
-        # 로컬 환경 감지 조건들
-        local_indicators = [
-            os.path.exists('.env'),
-            os.path.exists('../.env'),
-            os.path.exists('../../.env'),
-            os.getenv('IS_LOCAL') == 'true',
-            os.getenv('ENVIRONMENT') == 'local',
-            os.getenv('DOCKER_ENV') == 'local',
-            # Windows 경로 확인
-            os.path.exists('C:/Project/lily_generate_project/lily_generate_package/.env'),
-        ]
-        is_local = any(local_indicators)
-        logger.info(f"🔍 환경 감지: {'로컬' if is_local else '서버'}")
-        return is_local
-    def _load_environment_variables(self):
-        """환경변수를 로드합니다."""
-        import os
-        try:
-            if self.is_local:
-                # 로컬 환경: .env 파일 로드
-                from dotenv import load_dotenv
-                # 여러 경로에서 .env 파일 찾기
-                env_paths = [
-                    '.env',
-                    '../.env',
-                    '../../.env',
-                    'C:/Project/lily_generate_project/lily_generate_package/.env',
-                ]
-                env_loaded = False
-                for env_path in env_paths:
-                    if os.path.exists(env_path):
-                        load_dotenv(env_path)
-                        logger.info(f"✅ 환경변수 로드됨: {env_path}")
-                        env_loaded = True
-                        break
-                if not env_loaded:
-                    logger.warning("⚠️ .env 파일을 찾을 수 없습니다")
-            else:
-                # 서버 환경: 시스템 환경변수 사용
-                logger.info("🌐 서버 환경변수 사용")
-        except ImportError:
-            logger.warning("⚠️ python-dotenv가 설치되지 않음")
-        except Exception as e:
-            logger.error(f"❌ 환경변수 로드 실패: {e}")
-    def load_model(self) -> Tuple[Any, Any]:
-        """환경에 따라 모델을 로드합니다."""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        import os
-        from pathlib import Path
-        # 환경변수 로딩
-        self._load_environment_variables()
-        try:
-            # 1. 로컬 캐시 경로가 있는지 확인
-            use_local = False
-            if self.local_path is not None:
-                local_model_path = Path(self.local_path)
-                use_local = local_model_path.exists() and any(local_model_path.iterdir())
-            if use_local:
-                logger.info(f"🗂️ 로컬 모델 사용: {self.local_path}")
-                model_path = self.local_path
-                local_files_only = True
-                # 로컬 모델의 경우 sys.path에 추가
-                if self.local_path not in sys.path:
-                    sys.path.insert(0, self.local_path)
-            else:
-                logger.info(f"🌐 Hugging Face Hub에서 다운로드: {self.model_name}")
-                model_path = self.model_name
-                local_files_only = False
-            # 환경별 추가 설정
-            if self.is_local:
-                logger.info("🏠 로컬 환경 설정 적용")
-                # 로컬 환경에서는 추가 설정이 필요할 수 있음
-            else:
-                logger.info("☁️ 서버 환경 설정 적용")
-                # 서버 환경에서는 캐시 디렉토리 등 설정
-            # 2. 토크나이저 로드
-            tokenizer = AutoTokenizer.from_pretrained(
-                model_path,
-                token=HF_TOKEN,
-                trust_remote_code=True,
-                local_files_only=local_files_only,
-                cache_dir="/app/cache/transformers" if not use_local else None
-            )
-            logger.info(f"✅ 토크나이저 로드 완료 ({tokenizer.__class__.__name__})")
-            from modeling import KananaVForConditionalGeneration
-            # 3. 모델 로드
-            if use_local:
-                # 로컬 모델: 커스텀 모델링 클래스 사용
-                model = KananaVForConditionalGeneration.from_pretrained(
-                    model_path,
-                    token=HF_TOKEN,
-                    trust_remote_code=True,
-                    torch_dtype=torch.bfloat16,
-                    local_files_only=True,
-                    # low_cpu_mem_usage=True,
-                ).to(DEVICE)
-            else:
-                model = KananaVForConditionalGeneration.from_pretrained(
-                    model_path,
-                    token=HF_TOKEN,
-                    torch_dtype=torch.float16,
-                    trust_remote_code=True,
-                    cache_dir="/app/cache/transformers",
-                    # device_map="auto",
-                    # low_cpu_mem_usage=True,
-                ).to(DEVICE)
-            logger.info(f"✅ 모델 로드 완료 ({model.__class__.__name__})")
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}", exc_info=True)
-            if use_local and self.local_path in sys.path:
-                sys.path.remove(self.local_path)
-            raise
-    def get_generation_config(self) -> Dict[str, Any]:
-        # 모델 파라미터 최적화 설정, max_new_tokens : 생성되는 텍스트 길이 최대값 (이미지 설명을 위해 증가)
-        return {"max_new_tokens": max_new_tokens, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
-    def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
-        """
-        다양한 응답 형식을 처리할 수 있는 더 똑똑한 응답 추출 함수
-        """
-        logger.info(f"--- 응답 추출 시작 ---")
-        logger.info(f"전체 생성 텍스트 (Raw): \n---\n{full_text}\n---")
-        # 프롬프트가 제공된 경우 이를 제거
-        if formatted_prompt and formatted_prompt in full_text:
-            response = full_text.replace(formatted_prompt, "").strip()
-            logger.info(f"✅ 성공: 프롬프트 제거로 응답 추출")
-            logger.info(f"추출된 응답: {response}")
-            if response:  # 빈 문자열이 아닌 경우에만 반환
-                return response
-        # 1순위: 가장 정확한 특수 태그로 추출 시도
-        # 예: <|start_header_id|>assistant<|end_header_id|>안녕하세요...
-        # 또는 <|im_start|>assistant안녕하세요...
-        assistant_tags = [
-            "<|start_header_id|>assistant<|end_header_id|>",
-            "<|im_start|>assistant",
-            "assistant\n",
-            "assistant:"
-        ]
-        for tag in assistant_tags:
-            if tag in full_text:
-                parts = full_text.split(tag)
-                if len(parts) > 1:
-                    response = parts[-1].strip()
-                    # 추가 정리: 특수 토큰 제거
-                    response = response.replace("<|im_end|>", "").strip()
-                    logger.info(f"✅ 성공: '{tag}' 태그로 응답 추출")
-                    logger.info(f"추출된 응답: {response}")
-                    if response:  # 빈 문자열이 아닌 경우에만 반환
-                        return response
-        # 2순위: 간단한 키워드로 추출 시도
-        # 예: ... user 안녕하세요 assistant 안녕하세요 ...
-        if "assistant" in full_text:
-            parts = full_text.split("assistant")
-            if len(parts) > 1:
-                response = parts[-1].strip()
-                response = response.replace("<|im_end|>", "").strip()
-                logger.info("✅ 성공: 'assistant' 키워드로 응답 추출")
-                logger.info(f"추출된 응답: {response}")
-                if response:  # 빈 문자열이 아닌 경우에만 반환
-                    return response
-        # 3순위: 프롬프트가 없는 경우, 전체 텍스트에서 불필요한 부분 제거
-        clean_text = full_text.strip()
-        # 일반적인 프롬프트 패턴 제거 시도
-        patterns_to_remove = [
-            "<|im_start|>user\n",
-            "<|im_end|>",
-            "<image>",
-            "user\n",
-            "assistant\n"
-        ]
-        for pattern in patterns_to_remove:
-            clean_text = clean_text.replace(pattern, "")
-        clean_text = clean_text.strip()
-        if clean_text and clean_text != full_text:
-            logger.info("✅ 성공: 패턴 제거로 응답 정리")
-            logger.info(f"정리된 응답: {clean_text}")
-            return clean_text
-        logger.warning("⚠️ 경고: 응답에서 assistant 부분을 찾지 못했습니다. 전체 텍스트를 반환합니다.")
-        logger.info(f"최종 반환 텍스트: {full_text}")
-        return full_text
-    def get_model_info(self) -> Dict[str, Any]:
-        return {"model_name": self.model_name, "display_name": self.display_name, "description": self.description, "language": self.language, "model_size": self.model_size, "local_path": self.local_path, "multimodal": self.multimodal}

lily_llm_api/models/kanana_nano_2_1b_instruct.py DELETED Viewed

@@ -1,95 +0,0 @@
-#!/usr/bin/env python3
-"""
-Kanana Nano 2.1B Instruct 모델 프로필
-"""
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from typing import Dict, Any, Tuple
-import logging
-logger = logging.getLogger(__name__)
-class KananaNano21bInstructProfile:
-    """Kanana Nano 2.1B Instruct 모델 프로필"""
-    def __init__(self):
-        self.model_name = "kakaocorp/kanana-nano-2.1b-instruct"
-        self.local_path = "./lily_llm_core/models/kanana-nano-2.1b-instruct"
-        self.display_name = "Kanana Nano 2.1B Instruct"
-        self.description = "Kakao의 Kanana Nano 2.1B Instruct 모델 (가장 작은 모델)"
-        self.language = ["ko", "en"]
-        self.model_size = "2.1B"
-    def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        try:
-            # 토크나이저 로드
-            tokenizer = AutoTokenizer.from_pretrained(
-                self.local_path,
-                trust_remote_code=True,
-                local_files_only=True
-            )
-            # 모델 로드
-            model = AutoModelForCausalLM.from_pretrained(
-                self.local_path,
-                torch_dtype=torch.float32,
-                device_map="cpu",
-                low_cpu_mem_usage=True,
-                local_files_only=True
-            )
-            # 토크나이저 설정
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            if tokenizer.eos_token is None:
-                tokenizer.eos_token = "</s>"
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
-            raise
-    def format_prompt(self, user_input: str) -> str:
-        """프롬프트 포맷팅"""
-        return f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
-    def extract_response(self, full_text: str, formatted_prompt: str) -> str:
-        """응답 추출"""
-        if "<|im_start|>assistant\n" in full_text:
-            response = full_text.split("<|im_start|>assistant\n")[-1]
-            if "<|im_end|>" in response:
-                response = response.split("<|im_end|>")[0]
-            return response.strip()
-        return full_text.strip()
-    def get_generation_config(self) -> Dict[str, Any]:
-        """생성 설정"""
-        return {
-            "max_new_tokens": 128,  # 512에서 128로 줄임
-            "temperature": 0.7,
-            "top_p": 0.9,
-            "do_sample": True,
-            "repetition_penalty": 1.1,
-            "no_repeat_ngram_size": 3,
-            "pad_token_id": None,  # 토크나이저에서 설정됨
-            "eos_token_id": None,  # 토크나이저에서 설정됨
-            "use_cache": True,  # 캐시 사용
-            "return_dict_in_generate": False,  # 메모리 절약
-        }
-    def get_model_info(self) -> Dict[str, Any]:
-        """모델 정보"""
-        return {
-            "model_name": self.model_name,
-            "display_name": self.display_name,
-            "description": self.description,
-            "language": self.language,
-            "model_size": self.model_size,
-            "local_path": self.local_path
-        }

lily_llm_api/models/mistral_7b_instruct.py DELETED Viewed

@@ -1,103 +0,0 @@
-#!/usr/bin/env python3
-"""
-Mistral-7B-Instruct-v0.2 모델 프로필
-mistralai/Mistral-7B-Instruct-v0.2 모델용
-"""
-from typing import Dict, Any, Tuple
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import logging
-logger = logging.getLogger(__name__)
-class Mistral7bInstructProfile:
-    """Mistral-7B-Instruct-v0.2 모델 프로필"""
-    def __init__(self):
-        self.model_name = "mistralai/Mistral-7B-Instruct-v0.2"
-        self.local_path = "./lily_llm_core/models/mistral-7B-Instruct-v0.2"
-        self.display_name = "Mistral-7B-Instruct-v0.2"
-        self.description = "Mistral AI의 7B 파라미터 인스트럭트 모델"
-        self.language = "en"
-        self.model_size = "7B"
-    def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        try:
-            # 로컬 모델 로드
-            tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            model = AutoModelForCausalLM.from_pretrained(
-                self.local_path,
-                trust_remote_code=True,
-                local_files_only=True,
-                torch_dtype=torch.bfloat16,
-                # device_map="cpu",
-                # low_cpu_mem_usage=True
-                # max_memory={"cpu": "8GB"}
-            )
-            # 모델을 CPU로 명시적 이동
-            model.to('cpu')
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
-            raise
-    def format_prompt(self, user_input: str) -> str:
-        """프롬프트 포맷팅 - Mistral 인스트럭트 형식"""
-        # Mistral-7B-Instruct-v0.2 모델의 권장 프롬프트 형식
-        prompt = f"""<s>[INST] {user_input} [/INST]"""
-        return prompt
-    def extract_response(self, full_text: str, formatted_prompt: str) -> str:
-        """응답 추출"""
-        # Mistral 모델의 응답 추출
-        if "[/INST]" in full_text:
-            response = full_text.split("[/INST]")[-1].strip()
-        else:
-            # 프롬프트 제거
-            if formatted_prompt in full_text:
-                response = full_text.replace(formatted_prompt, "").strip()
-            else:
-                response = full_text.strip()
-        # 빈 응답이나 이상한 문자만 있는 경우 처리
-        if not response or len(response.strip()) < 2:
-            return "Hello! How can I help you today?"
-        return response
-    def get_generation_config(self) -> Dict[str, Any]:
-        """생성 설정"""
-        return {
-            "max_new_tokens": 128,
-            "temperature": 0.7,
-            "do_sample": True,
-            "top_k": 50,
-            "top_p": 0.9,
-            "repetition_penalty": 1.1,
-            "no_repeat_ngram_size": 3,
-            "pad_token_id": None,  # 모델에서 자동 설정
-            "eos_token_id": None   # 모델에서 자동 설정
-        }
-    def get_model_info(self) -> Dict[str, Any]:
-        """모델 정보"""
-        return {
-            "model_name": self.model_name,
-            "display_name": self.display_name,
-            "description": self.description,
-            "language": self.language,
-            "model_size": self.model_size,
-            "local_path": self.local_path
-        }

lily_llm_api/models/polyglot_ko_1_3b.py DELETED Viewed

@@ -1,102 +0,0 @@
-#!/usr/bin/env python3
-"""
-Polyglot-ko-1.3b 모델 프로필
-"""
-from typing import Dict, Any, Tuple
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import logging
-logger = logging.getLogger(__name__)
-class PolyglotKo13bProfile:
-    """Polyglot-ko-1.3b 모델 프로필"""
-    def __init__(self):
-        self.model_name = "EleutherAI/polyglot-ko-1.3b"
-        self.local_path = "./lily_llm_core/models/polyglot-ko-1.3b"
-        self.display_name = "Polyglot-ko-1.3b"
-        self.description = "한국어 전용 경량 모델 (1.3B)"
-        self.language = "ko"
-        self.model_size = "1.3B"
-    def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        try:
-            # 로컬 모델 로드
-            tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            model = AutoModelForCausalLM.from_pretrained(
-                self.local_path,
-                torch_dtype=torch.bfloat16,
-                device_map="cpu",
-                # low_cpu_mem_usage=True,
-                trust_remote_code=True,
-                local_files_only=True,
-            )
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
-            raise
-    def format_prompt(self, user_input: str) -> str:
-        """프롬프트 포맷팅"""
-        # 더 자연스러운 한국어 프롬프트 형식
-        prompt = f"""다음 질문에 대해 친절하고 자세히 답변해주세요.
-질문: {user_input}
-답변:"""
-        return prompt
-    def extract_response(self, full_text: str, formatted_prompt: str) -> str:
-        """응답 추출"""
-        # "답변:" 이후의 텍스트를 추출
-        if "답변:" in full_text:
-            response = full_text.split("답변:")[-1].strip()
-        else:
-            # 프롬프트 제거
-            if formatted_prompt in full_text:
-                response = full_text.replace(formatted_prompt, "").strip()
-            else:
-                response = full_text.strip()
-        # 빈 응답이나 이상한 문자만 있는 경우 처리
-        if not response or len(response.strip()) < 2:
-            return "안녕하세요! 무엇을 도와드릴까요?"
-        return response
-    def get_generation_config(self) -> Dict[str, Any]:
-        """생성 설정"""
-        return {
-            "max_new_tokens": 128,
-            "temperature": 0.7,
-            "do_sample": True,
-            "top_k": 50,
-            "top_p": 0.9,
-            "repetition_penalty": 1.1,
-            "no_repeat_ngram_size": 3,
-            "pad_token_id": None,  # 모델에서 자동 설정
-            "eos_token_id": None   # 모델에서 자동 설정
-        }
-    def get_model_info(self) -> Dict[str, Any]:
-        """모델 정보"""
-        return {
-            "model_name": self.model_name,
-            "display_name": self.display_name,
-            "description": self.description,
-            "language": self.language,
-            "model_size": self.model_size,
-            "local_path": self.local_path
-        }

lily_llm_api/models/polyglot_ko_1_3b_chat.py CHANGED Viewed

@@ -8,6 +8,8 @@ from typing import Dict, Any, Tuple
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import logging
 logger = logging.getLogger(__name__)
@@ -16,38 +18,43 @@ class PolyglotKo13bChatProfile:
     def __init__(self):
         self.model_name = "heegyu/polyglot-ko-1.3b-chat"
-        self.local_path = "./lily_llm_core/models/polyglot-ko-1.3b-chat"
         self.display_name = "Polyglot-ko-1.3b-chat"
         self.description = "한국어 채팅 전용 경량 모델 (1.3B)"
         self.language = "ko"
         self.model_size = "1.3B"
     def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
         logger.info(f"📥 {self.display_name} 모델 로드 중...")
         try:
-            # 로컬 모델 로드
-            tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
             if tokenizer.pad_token is None:
                 tokenizer.pad_token = tokenizer.eos_token
             model = AutoModelForCausalLM.from_pretrained(
-                self.local_path,
-                # torch_dtype=torch.float32,
-                device_map="cpu",
-                # low_cpu_mem_usage=True
                 trust_remote_code=True,
-                torch_dtype=torch.bfloat16,
-                local_files_only=True,
-            )
-            # model.to('cpu')
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
             return model, tokenizer
         except Exception as e:
             logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
             raise
@@ -100,5 +107,6 @@ class PolyglotKo13bChatProfile:
             "description": self.description,
             "language": self.language,
             "model_size": self.model_size,
-            "local_path": self.local_path
         }

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import logging
+import os
+from pathlib import Path
 logger = logging.getLogger(__name__)
     def __init__(self):
         self.model_name = "heegyu/polyglot-ko-1.3b-chat"
+        self.local_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
         self.display_name = "Polyglot-ko-1.3b-chat"
         self.description = "한국어 채팅 전용 경량 모델 (1.3B)"
         self.language = "ko"
         self.model_size = "1.3B"
     def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
+        """모델 로드 (로컬 우선, 없으면 Hub)"""
         logger.info(f"📥 {self.display_name} 모델 로드 중...")
         try:
+            use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
+            model_path = self.local_path if use_local else self.model_name
+            logger.info(f"🔍 모델 경로: {model_path} (local={'yes' if use_local else 'no'})")
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                use_fast=True,
+                trust_remote_code=True,
+                local_files_only=use_local,
+            )
             if tokenizer.pad_token is None:
                 tokenizer.pad_token = tokenizer.eos_token
+            # CPU에서는 float32가 더 안정적, CUDA에서는 float16 사용
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            selected_dtype = torch.float16 if device == 'cuda' else torch.float32
             model = AutoModelForCausalLM.from_pretrained(
+                model_path,
                 trust_remote_code=True,
+                torch_dtype=selected_dtype,
+                local_files_only=use_local,
+            ).to(device)
+            logger.info(f"✅ {self.display_name} 모델 로드 성공! (device={device}, dtype={selected_dtype})")
             return model, tokenizer
         except Exception as e:
             logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
             raise
             "description": self.description,
             "language": self.language,
             "model_size": self.model_size,
+            "local_path": self.local_path,
+            "multimodal": False,
         }

lily_llm_api/models/polyglot_ko_5_8b.py DELETED Viewed

@@ -1,104 +0,0 @@
-#!/usr/bin/env python3
-"""
-KoAlpaca-Polyglot-5.8B 모델 다운로드
-"""
-from typing import Dict, Any, Tuple
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import logging
-logger = logging.getLogger(__name__)
-class PolyglotKo58bProfile:
-    """KoAlpaca-Polyglot-5.8B 모델 프로필"""
-    def __init__(self):
-        self.model_name = "beomi/KoAlpaca-Polyglot-5.8B"
-        self.local_path = "./lily_llm_core/models/koalpaca-polyglot-5.8b"
-        self.display_name = "KoAlpaca-Polyglot-5.8B"
-        self.description = "EleutherAI/polyglot-ko-5.8b의 미세 조정된 버전"
-        self.language = "ko"
-        self.model_size = "5.8B"
-    def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
-        logger.info(f"📥 {self.display_name} 모델 로드 중...")
-        try:
-            # 로컬 모델 로드
-            tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
-            if tokenizer.pad_token is None:
-                tokenizer.pad_token = tokenizer.eos_token
-            model = AutoModelForCausalLM.from_pretrained(
-                self.local_path,
-                torch_dtype=torch.bfloat16,
-                # torch_dtype=torch.float32,
-                device_map="cpu",
-                # low_cpu_mem_usage=True,
-                trust_remote_code=True,
-                local_files_only=True,
-            )
-            # model.to('cpu')
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
-            return model, tokenizer
-        except Exception as e:
-            logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
-            raise
-    def format_prompt(self, user_input: str) -> str:
-        """프롬프트 포맷팅 - 채팅 형식"""
-        # heegyu/polyglot-ko-1.3b-chat 모델의 권장 프롬프트 형식
-        prompt = f"""당신은 AI 챗봇입니다. 사용자에게 도움이 되고 유익한 내용을 제공해야합니다. 답변은 길고 자세하며 친절한 설명을 덧붙여서 작성하세요.
-        ### 사용자:
-        {user_input}
-        ### 챗봇:
-        """
-        return prompt
-    def extract_response(self, full_text: str, formatted_prompt: str) -> str:
-        """응답 추출"""
-        # "### 챗봇:" 이후의 텍스트를 추출
-        if "### 챗봇:" in full_text:
-            response = full_text.split("### 챗봇:")[-1].strip()
-        else:
-            # 프롬프트 제거
-            if formatted_prompt in full_text:
-                response = full_text.replace(formatted_prompt, "").strip()
-            else:
-                response = full_text.strip()
-        return response
-    def get_generation_config(self) -> Dict[str, Any]:
-        """생성 설정"""
-        return {
-            "max_new_tokens": 128,
-            "temperature": 0.7,
-            "do_sample": True,
-            "top_k": 50,
-            "top_p": 0.9,
-            "repetition_penalty": 1.1,
-            "no_repeat_ngram_size": 3,
-            "pad_token_id": None,  # 모델에서 자동 설정
-            "eos_token_id": None   # 모델에서 자동 설정
-        }
-    def get_model_info(self) -> Dict[str, Any]:
-        """모델 정보"""
-        return {
-            "model_name": self.model_name,
-            "display_name": self.display_name,
-            "description": self.description,
-            "language": self.language,
-            "model_size": self.model_size,
-            "local_path": self.local_path
-        }

lily_llm_api/models/polyglot_ko_5_8b_chat.py CHANGED Viewed

@@ -15,35 +15,43 @@ class PolyglotKo58bChatProfile:
     def __init__(self):
         self.model_name = "heegyu/polyglot-ko-5.8b-chat"
-        self.local_path = "./lily_llm_core/models/polyglot-ko-5.8b-chat"
         self.display_name = "heegyu/polyglot-ko-5.8b-chat"
         self.description = "EleutherAI/polyglot-ko-5.8b를 여러 한국어 instruction 데이터셋으로 학습한 모델"
         self.language = "ko"
         self.model_size = "5.8B"
     def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
-        """모델 로드"""
         logger.info(f"📥 {self.display_name} 모델 로드 중...")
         try:
-            # 로컬 모델 로드
-            tokenizer = AutoTokenizer.from_pretrained(self.local_path, use_fast=True)
             if tokenizer.pad_token is None:
                 tokenizer.pad_token = tokenizer.eos_token
             model = AutoModelForCausalLM.from_pretrained(
-                self.local_path,
-                torch_dtype=torch.bfloat16,
-                device_map="cpu",
-                # low_cpu_mem_usage=True,
                 trust_remote_code=True,
-                local_files_only=True,
-            )
-            logger.info(f"✅ {self.display_name} 모델 로드 성공!")
             return model, tokenizer
         except Exception as e:
             logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
             raise
@@ -84,5 +92,6 @@ class PolyglotKo58bChatProfile:
             "description": self.description,
             "language": self.language,
             "model_size": self.model_size,
-            "local_path": self.local_path
         }

     def __init__(self):
         self.model_name = "heegyu/polyglot-ko-5.8b-chat"
+        self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
         self.display_name = "heegyu/polyglot-ko-5.8b-chat"
         self.description = "EleutherAI/polyglot-ko-5.8b를 여러 한국어 instruction 데이터셋으로 학습한 모델"
         self.language = "ko"
         self.model_size = "5.8B"
     def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
+        """모델 로드 (로컬 우선, 없으면 Hub)"""
         logger.info(f"📥 {self.display_name} 모델 로드 중...")
         try:
+            from pathlib import Path
+            use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
+            model_path = self.local_path if use_local else self.model_name
+            logger.info(f"🔍 모델 경로: {model_path} (local={'yes' if use_local else 'no'})")
+            tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                use_fast=True,
+                trust_remote_code=True,
+                local_files_only=use_local,
+            )
             if tokenizer.pad_token is None:
                 tokenizer.pad_token = tokenizer.eos_token
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            selected_dtype = torch.float16 if device == 'cuda' else torch.float32
             model = AutoModelForCausalLM.from_pretrained(
+                model_path,
                 trust_remote_code=True,
+                torch_dtype=selected_dtype,
+                local_files_only=use_local,
+            ).to(device)
+            logger.info(f"✅ {self.display_name} 모델 로드 성공! (device={device}, dtype={selected_dtype})")
             return model, tokenizer
         except Exception as e:
             logger.error(f"❌ {self.display_name} 모델 로드 실패: {e}")
             raise
             "description": self.description,
             "language": self.language,
             "model_size": self.model_size,
+            "local_path": self.local_path,
+            "multimodal": False,
         }

lily_llm_core/config.py CHANGED Viewed

@@ -36,8 +36,8 @@ class ModelSettings(BaseSettings):
     # 모델별 설정
     kanana_1_5_v_3b_instruct_model_path: str = Field(default="./models/kanana_1_5_v_3b_instruct", description="Kanana 1.5 v 3b 모델 경로")
-    polyglot_ko_1_3b_chat_model_path: str = Field(default="./models/polyglot-ko-1.3b-chat", description="Polyglot 1.3b 모델 경로")
-    polyglot_ko_5_8b_chat_model_path: str = Field(default="./models/polyglot-ko-5.8b-chat", description="Polyglot 5.8b 모델 경로")
     class Config:
         env_prefix = "MODEL_"

     # 모델별 설정
     kanana_1_5_v_3b_instruct_model_path: str = Field(default="./models/kanana_1_5_v_3b_instruct", description="Kanana 1.5 v 3b 모델 경로")
+    polyglot_ko_1_3b_chat_model_path: str = Field(default="./models/polyglot_ko_1_3b_chat", description="Polyglot 1.3b 모델 경로")
+    polyglot_ko_5_8b_chat_model_path: str = Field(default="./models/polyglot_ko_5_8b_chat", description="Polyglot 5.8b 모델 경로")
     class Config:
         env_prefix = "MODEL_"

test.py CHANGED Viewed

@@ -1,60 +1,82 @@
 import requests
 import json
-import os # os 모듈 추가
-from dotenv import load_dotenv
-load_dotenv()
-# 1. 환경 변수에서 허깅페이스 토큰을 가져옵니다.
-#    터미널에서 `set HUGGING_FACE_TOKEN=hf_...` (Windows) 또는
-#    `export HUGGING_FACE_TOKEN=hf_...` (Mac/Linux) 명령으로 미리 설정합니다.
-HF_TOKEN = os.getenv("HF_TOKEN")
-# 허깅페이스 FastAPI 서버 URL
-HF_API_BASE = "https://gbrabbit-lily-fast-api.hf.space"
-def test_generate_text():
-    """텍스트 생성 테스트 (인증 추가)"""
-    print("\n🔍 텍스트 생성 테스트...")
-    if not HF_TOKEN:
-        print("❌ HUGGING_FACE_TOKEN 환경 변수가 설정되지 않았습니다.")
-        return False
-    try:
-        # 2. 인증 토큰을 담을 헤더(headers)를 생성합니다.
-        headers = {
-            "Authorization": f"Bearer {HF_TOKEN}"
-        }
-        data = {
-            'prompt': '안녕하세요! Private 스페이스에서 잘 지내시나요?',
-            'max_length': 20
         }
-        print(f"📤 요청 데이터 (Form): {json.dumps(data, ensure_ascii=False)}")
-        # 3. requests.post 호출 시 headers 파라미터를 추가합니다.
-        response = requests.post(
-            f"{HF_API_BASE}/generate",
-            headers=headers,  # <<-- 인증 헤더 추가!
-            data=data,
-            timeout=2000
-        )
-        print(f"✅ 상태 코드: {response.status_code}") # 이제 200이 표시될 것입니다.
         if response.status_code == 200:
             result = response.json()
-            print(f"✅ 응답: {json.dumps(result, indent=2, ensure_ascii=False)}")
-        else:
-            print(f"❌ 응답: {response.text}")
-        return response.status_code == 200
     except Exception as e:
-        print(f"❌ 텍스트 생성 테스트 실패: {e}")
-        return False
-# 스크립트 실행
 if __name__ == "__main__":
-    test_generate_text()

+#!/usr/bin/env python3
+"""
+간단한 API 테스트 스크립트 (최종 수정본)
+"""
 import requests
 import json
+def test_api():
+    """API 테스트"""
+    url = "http://localhost:8001/generate"
+    test_prompts = [
+        "안녕하세요!",
+        # "오늘 기분이 어때요?",
+        # "간단한 자기소개를 해주세요",
+        # "프로그래밍이란 무엇인가요?",
+        # "날씨가 좋네요"
+    ]
+    for i, prompt in enumerate(test_prompts, 1):
+        print(f"\n{'='*50}")
+        print(f"테스트 {i}: {prompt}")
+        print(f"{'='*50}")
+        # API 요청 - Form 데이터 형식으로 전송
+        payload = {
+            "prompt": prompt,
+            "max_length": 20,  # 더 짧게
+            "temperature": 0.8,  # 더 높게
+            "top_p": 0.95,      # 더 높게
+            "do_sample": True
         }
+        try:
+            # json= 인자를 data= 로 변경하여 Form 데이터로 전송
+            response = requests.post(url, data=payload, timeout=600) # 텍스트 생성 시간을 고려해 타임아웃 증가
+            if response.status_code == 200:
+                result = response.json()
+                print(f"✅ 성공!")
+                print(f"📝 생성된 텍스트: '{result['generated_text']}'")
+                print(f"⏱️ 처리 시간: {result['processing_time']:.2f}초")
+                print(f"🤖 모델: {result['model_name']}")
+            else:
+                print(f"❌ 오류: {response.status_code}")
+                print(f"📄 응답: {response.text}")
+        except Exception as e:
+            print(f"❌ 요청 실패: {e}")
+def test_raw_response():
+    """원시 응답 확인"""
+    url = "http://localhost:8001/generate"
+    payload = {
+        "prompt": "Hello",
+        "max_length": 20,
+        "temperature": 1.0,
+        "top_p": 1.0,
+        "do_sample": True
+    }
+    try:
+        response = requests.post(url, json=payload, timeout=30)
+        print(f"\n🔍 원시 응답 확인:")
+        print(f"상태 코드: {response.status_code}")
+        print(f"응답 헤더: {dict(response.headers)}")
+        print(f"응답 내용: {response.text}")
         if response.status_code == 200:
             result = response.json()
+            print(f"파싱된 JSON: {json.dumps(result, indent=2, ensure_ascii=False)}")
     except Exception as e:
+        print(f"❌ 원시 응답 확인 실패: {e}")
 if __name__ == "__main__":
+    print("🧪 Lily LLM API 테스트 시작")
+    test_api()
+    # test_raw_response()
+    print("\n✅ 테스트 완료!")

test_hf_with_token.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import requests
+import json
+import os # os 모듈 추가
+from dotenv import load_dotenv
+load_dotenv()
+# 1. 환경 변수에서 허깅페이스 토큰을 가져옵니다.
+#    터미널에서 `set HUGGING_FACE_TOKEN=hf_...` (Windows) 또는
+#    `export HUGGING_FACE_TOKEN=hf_...` (Mac/Linux) 명령으로 미리 설정합니다.
+HF_TOKEN = os.getenv("HF_TOKEN")
+# 허깅페이스 FastAPI 서버 URL
+HF_API_BASE = "https://gbrabbit-lily-fast-api.hf.space"
+def test_generate_text():
+    """텍스트 생성 테스트 (인증 추가)"""
+    print("\n🔍 텍스트 생성 테스트...")
+    if not HF_TOKEN:
+        print("❌ HUGGING_FACE_TOKEN 환경 변수가 설정되지 않았습니다.")
+        return False
+    try:
+        # 2. 인증 토큰을 담을 헤더(headers)를 생성합니다.
+        headers = {
+            "Authorization": f"Bearer {HF_TOKEN}"
+        }
+        data = {
+            'prompt': '안녕하세요! Private 스페이스에서 잘 지내시나요?',
+            'max_length': 20
+        }
+        print(f"📤 요청 데이터 (Form): {json.dumps(data, ensure_ascii=False)}")
+        # 3. requests.post 호출 시 headers 파라미터를 추가합니다.
+        response = requests.post(
+            f"{HF_API_BASE}/generate",
+            headers=headers,  # <<-- 인증 헤더 추가!
+            data=data,
+            timeout=2000
+        )
+        print(f"✅ 상태 코드: {response.status_code}") # 이제 200이 표시될 것입니다.
+        if response.status_code == 200:
+            result = response.json()
+            print(f"✅ 응답: {json.dumps(result, indent=2, ensure_ascii=False)}")
+        else:
+            print(f"❌ 응답: {response.text}")
+        return response.status_code == 200
+    except Exception as e:
+        print(f"❌ 텍스트 생성 테스트 실패: {e}")
+        return False
+# 스크립트 실행
+if __name__ == "__main__":
+    test_generate_text()

test_log.md ADDED Viewed

	@@ -0,0 +1,165 @@

+(lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
+🧪 Lily LLM API 테스트 시작
+==================================================
+테스트 1: 안녕하세요!
+==================================================
+✅ 성공!
+📝 생성된 텍스트: 'Hello! How can I assist you today?'
+⏱️ 처리 시간: 154.13초
+🤖 모델: kanana-1.5-v-3b-instruct
+✅ 테스트 완료!
+(lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
+🧪 Lily LLM API 테스트 시작
+==================================================
+테스트 1: 안녕하세요!
+==================================================
+✅ 성공!
+📝 생성된 텍스트: 'Hello! How can I assist you today? We're here to help with any questions or tasks you'
+⏱️ 처리 시간: 217.69초
+🤖 모델: kanana-1.5-v-3b-instruct
+-----
+INFO:     127.0.0.1:62794 - "POST /generate HTTP/1.1" 500 Internal Server Error
+ERROR:    Exception in ASGI application
+Traceback (most recent call last):
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\uvicorn\protocols\http\httptools_impl.py", line 409, in run_asgi
+    result = await app(  # type: ignore[func-returns-value]
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
+    return await self.app(scope, receive, send)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
+    await super().__call__(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\applications.py", line 113, in __call__
+    await self.middleware_stack(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\errors.py", line 186, in __call__
+    raise exc
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\errors.py", line 164, in __call__
+    await self.app(scope, receive, _send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\cors.py", line 85, in __call__
+    await self.app(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\middleware\exceptions.py", line 63, in __call__
+    await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app
+    raise exc
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app
+    await app(scope, receive, sender)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 716, in __call__
+    await self.middleware_stack(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 736, in app
+    await route.handle(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 290, in handle
+    await self.app(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 78, in app
+    await wrap_app_handling_exceptions(app, request)(scope, receive, send)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 53, in wrapped_app
+    raise exc
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\_exception_handler.py", line 42, in wrapped_app
+    await app(scope, receive, sender)
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\starlette\routing.py", line 75, in app
+    response = await f(request)
+               ^^^^^^^^^^^^^^^^
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\fastapi\routing.py", line 302, in app
+    raw_response = await run_endpoint_function(
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\fastapi\routing.py", line 213, in run_endpoint_function
+    return await dependant.call(**values)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Project\lily_generate_project\lily_generate_package\lily_llm_api\app_v2.py", line 372, in generate
+    result = await loop.run_in_executor(
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Users\gigab\AppData\Local\Programs\Python\Python311\Lib\concurrent\futures\thread.py", line 58, in run
+    result = self.fn(*self.args, **self.kwargs)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "C:\Project\lily_generate_project\lily_generate_package\lily_llm_api\app_v2.py", line 303, in generate_sync
+    inputs = tokenizer.encode_prompt(prompt=formatted_prompt, image_meta=combined_image_metas)
+             ^^^^^^^^^^^^^^^^^^^^^^^
+  File "c:\Project\lily_generate_project\lily_generate_package\lily_llm_env\Lib\site-packages\transformers\tokenization_utils_base.py", line 1099, in __getattr__
+    raise AttributeError(f"{self.__class__.__name__} has no attribute {key}")
+AttributeError: PreTrainedTokenizerFast has no attribute encode_prompt
+---
+(lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
+🧪 Lily LLM API 테스트 시작
+==================================================
+테스트 1: 안녕하세요!
+==================================================
+✅ 성공!
+📝 생성된 텍스트: 'Hello! How can I assist you today? We're here to help with any questions or tasks you'
+⏱️ 처리 시간: 217.69초
+🤖 모델: kanana-1.5-v-3b-instruct
+✅ 테스트 완료!
+(lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
+🧪 Lily LLM API 테스트 시작
+==================================================
+테스트 1: 안녕하세요!
+==================================================
+✅ 성공!
+📝 생성된 텍스트: '"안녕하세요!"
+인사: 안녕하십니까?
+질문: "제'
+⏱️ 처리 시간: 20.50초
+🤖 모델: Polyglot-ko-1.3b-chat
+✅ 테스트 완료!
+---
+(lily_llm_env) C:\Project\lily_generate_project\lily_generate_package>python test.py
+🧪 Lily LLM API 테스트 시작
+==================================================
+테스트 1: 안녕하세요!
+==================================================
+✅ 성공!
+📝 생성된 텍스트: '&&...
+안녕하세요, 저는 Alister입니다'
+⏱️ 처리 시간: 17.73초
+🤖 모델: Polyglot-ko-1.3b-chat
+✅ 테스트 완료!
+--