Spaces:

gbrabbit
/

lily_fast_api

Sleeping

App Files Files Community

gbrabbit commited on Aug 24, 2025

Commit

1d1372e

1 Parent(s): e938fb9

Auto commit at 24-2025-08 18:22:22

Browse files

Files changed (2) hide show

lily_llm_api/api/routers/generation_router.py +6 -2
lily_llm_api/services/generation_service.py +218 -37

lily_llm_api/api/routers/generation_router.py CHANGED Viewed

@@ -25,7 +25,11 @@ async def generate(request: Request,
                   user_id: str = Form("anonymous"),
                   room_id: str = Form("default"),
                   use_context: bool = Form(True),
-                  session_id: str = Form(None)):
     if not is_model_loaded():
         raise HTTPException(status_code=503, detail="모델이 로드되지 않았습니다.")
@@ -74,7 +78,7 @@ async def generate(request: Request,
     try:
         # generate_sync 함수 호출 (컨텍스트 포함)
-        result = generate_sync(prompt, image_data_list, use_context=use_context, session_id=session_id, user_id=user_id, room_id=room_id)
         if "error" in result:
             raise HTTPException(status_code=500, detail=result["error"])

                   user_id: str = Form("anonymous"),
                   room_id: str = Form("default"),
                   use_context: bool = Form(True),
+                  session_id: str = Form(None),
+                  use_rag_images: bool = Form(False),
+                  use_rag_text: bool = Form(False),
+                  document_id: str = Form(None),
+                  image_short_side: int = Form(None)):
     if not is_model_loaded():
         raise HTTPException(status_code=503, detail="모델이 로드되지 않았습니다.")
     try:
         # generate_sync 함수 호출 (컨텍스트 포함)
+        result = generate_sync(prompt, image_data_list, use_context=use_context, session_id=session_id, user_id=user_id, room_id=room_id, use_rag_images=use_rag_images, use_rag_text=use_rag_text, document_id=document_id, image_short_side=image_short_side)
         if "error" in result:
             raise HTTPException(status_code=500, detail=result["error"])

lily_llm_api/services/generation_service.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Generation service for Lily LLM API
 """
 import logging
 import time
 from typing import Optional, List, Dict
 from pathlib import Path
@@ -16,6 +17,11 @@ logger = logging.getLogger(__name__)
 # 주의: 프로세스 재시작 시 초기화됨. 최대 4장 보관.
 _session_image_cache: Dict[str, List[bytes]] = {}
 # 선택적: 벡터 스토어에서 최근 문서 이미지 복구 지원
 try:
     from lily_llm_core.vector_store_manager import vector_store_manager, SimpleVectorStore
@@ -26,7 +32,9 @@ except Exception:
 def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
                  temperature: Optional[float] = None, top_p: Optional[float] = None,
                  do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
-                 user_id: str = "anonymous", room_id: str = "default") -> dict:
     """[최적화] 모델 생성을 처리하는 통합 동기 함수"""
     try:
         from .model_service import get_current_profile, get_current_model
@@ -92,8 +100,8 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                 all_image_data.extend(cached_imgs)
                 print(f"🔍 [DEBUG] 세션 캐시에서 이전 이미지 {len(cached_imgs)}개 복구 (세션: {session_id})")
-        # 추가 복구: 여전히 이미지가 없고 멀티모달이면, 최근 RAG 문서에서 이미지 바이트 복원
-        if (not all_image_data or len([img for img in all_image_data if img]) == 0) and getattr(current_profile, 'multimodal', False):
             try:
                 if vector_store_manager is not None:
                     # 사용자 문서 목록 가져오기 (최신순 정렬)
@@ -129,9 +137,12 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     print("⚠️ [DEBUG] vector_store_manager 미사용 - 이미지 복구 비활성화")
             except Exception as e:
                 print(f"⚠️ [DEBUG] RAG 기반 이미지 복구 실패: {e}")
         # 항상 참조 가능한 max_images 정의 (이미지 없으면 0)
-        max_images = min(len([img for img in all_image_data if img]) if all_image_data else 0, 4)
         if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
             print(f"🔍 [DEBUG] 이미지 처리 시작 - 총 이미지 개수: {len([img for img in all_image_data if img])}")
@@ -146,6 +157,22 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     if image_bytes:
                         try:
                             pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
                             # 🔄 공식 이미지 프로세서 사용
                             if processor and hasattr(processor, 'image_processor'):
                                 processed = processor.image_processor(pil_image)
@@ -169,6 +196,22 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     if image_bytes:
                         try:
                             pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
                             # 🔄 공식 이미지 프로세서 사용
                             if processor and hasattr(processor, 'image_processor'):
                                 # KananaVImageProcessor는 기본 파라미터만 지원
@@ -189,10 +232,71 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                 logger.error(f"❌ 이미지 전처리 실패: {e}")
                 combined_image_metas = {}
         # --- 2. 프롬프트 구성 ---
         print(f"🔍 [DEBUG] 프롬프트 구성 시작")
-        # 컨텍스트 통합 (대화 기록 + RAG 검색 결과 포함) - 모델별 최적화
         context_prompt = ""
         if use_context and session_id:
             try:
@@ -209,6 +313,34 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                 except Exception as e:
                     print(f"⚠️ [DEBUG] 컨텍스트 로드 실패: {e}")
                     context_prompt = ""
                 # 이미지 바이트를 세션 캐시에 보관 (다음 턴에 재사용)
                 if session_id:
                     # 원본 요청에 이미지가 있었다면 그걸 우선 보관, 없으면 복구된 이미지 유지
@@ -226,6 +358,16 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
         # formatted_prompt 초기화
         formatted_prompt = None
         # 🔄 멀티모달 프롬프트 구성 (공식 방식)
         if all_pixel_values and len(all_pixel_values) > 0:
@@ -233,6 +375,7 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
             num_images = len(all_pixel_values)
             image_tokens = "<image>" * num_images  # 이미지 개수만큼 <image> 토큰 생성
             # 답변 유도를 위해 Assistant 프리픽스 추가
             formatted_prompt = f"Human: {image_tokens}{prompt}\nAssistant:"
             print(f"🔍 [DEBUG] 멀티모달 프롬프트 구성 (공식 형식): {formatted_prompt}")
             print(f"🔍 [DEBUG] 이미지 토큰 생성: {num_images}개 이미지 -> {image_tokens}")
@@ -400,20 +543,68 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     # 🔄 최종 검증
                     print(f"🔍 [DEBUG] 최종 input_ids 타입: {type(input_ids)}, shape: {input_ids.shape}")
                     print(f"🔍 [DEBUG] 최종 attention_mask 타입: {type(attention_mask)}, shape: {attention_mask.shape}")
                 except Exception as e:
-                    print(f"❌ [DEBUG] encode_prompt 실패: {e}, 폴백 사용")
-                    # 폴백: 기본 토크나이저 사용
-                    inputs = tokenizer(
-                        formatted_prompt,
-                        return_tensors="pt",
-                        padding=True,
-                        truncation=True,
-                        max_length=2048,
-                    )
-                    if 'token_type_ids' in inputs:
-                        del inputs['token_type_ids']
-                    input_ids = inputs['input_ids']
-                    attention_mask = inputs['attention_mask']
             else:
                 # 안전 폴백
                 print(f"🔍 [DEBUG] 기본 토크나이저 사용 (폴백)")
@@ -586,25 +777,15 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                             else:
                                 processed_image_metas[key] = value
-                    # 🔄 참고 로그만 출력: 이미지 토큰 수 추정 (조정은 하지 않음)
-                    if 'image_token_thw' in processed_image_metas:
-                        image_token_thw = processed_image_metas['image_token_thw']
-                        if isinstance(image_token_thw, torch.Tensor):
-                            total_image_tokens = 0
-                            print(f"🔍 [DEBUG] image_token_thw shape: {image_token_thw.shape}")
-                            print(f"🔍 [DEBUG] image_token_thw 내용: {image_token_thw}")
-                            for i in range(image_token_thw.shape[0]):
-                                token_info = image_token_thw[i]
-                                if len(token_info) == 3:
-                                    t, h, w = token_info
-                                    total_image_tokens += t * h * w
-                                elif len(token_info) == 2:
-                                    h, w = token_info
-                                    total_image_tokens += h * w
-                            print(f"🔍 [DEBUG] 계산된 총 이미지 토큰 수(참고): {total_image_tokens}")
-                            if isinstance(total_image_tokens, torch.Tensor):
-                                total_image_tokens = total_image_tokens.sum().item()
-                            print(f"🔍 [DEBUG] pixel_values 길이: {pixel_values.shape[0]}, 예상: {total_image_tokens} (조정 안함)")
                     # 안전 가드: vision_grid_thw가 [1, N, 3]로 오면 [N, 3]로 변환
                     try:

 Generation service for Lily LLM API
 """
 import logging
+import os
 import time
 from typing import Optional, List, Dict
 from pathlib import Path
 # 주의: 프로세스 재시작 시 초기화됨. 최대 4장 보관.
 _session_image_cache: Dict[str, List[bytes]] = {}
+# 수동 리사이즈 강제 설정 (코드에서 직접 수정용). 예: 128, 256, 512 ... None이면 비활성화
+DEFAULT_IMAGE_SHORT_SIDE: Optional[int] = 128
+max_images_limit = 4
 # 선택적: 벡터 스토어에서 최근 문서 이미지 복구 지원
 try:
     from lily_llm_core.vector_store_manager import vector_store_manager, SimpleVectorStore
 def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
                  temperature: Optional[float] = None, top_p: Optional[float] = None,
                  do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
+                 user_id: str = "anonymous", room_id: str = "default", use_rag_images: bool = False,
+                 use_rag_text: bool = False, document_id: Optional[str] = None,
+                 image_short_side: Optional[int] = None) -> dict:
     """[최적화] 모델 생성을 처리하는 통합 동기 함수"""
     try:
         from .model_service import get_current_profile, get_current_model
                 all_image_data.extend(cached_imgs)
                 print(f"🔍 [DEBUG] 세션 캐시에서 이전 이미지 {len(cached_imgs)}개 복구 (세션: {session_id})")
+        # 추가 복구: 여전히 이미지가 없고 멀티모달이며, 명시적으로 허용된 경우에만 RAG에서 이미지 복원
+        if use_rag_images and (not all_image_data or len([img for img in all_image_data if img]) == 0) and getattr(current_profile, 'multimodal', False):
             try:
                 if vector_store_manager is not None:
                     # 사용자 문서 목록 가져오기 (최신순 정렬)
                     print("⚠️ [DEBUG] vector_store_manager 미사용 - 이미지 복구 비활성화")
             except Exception as e:
                 print(f"⚠️ [DEBUG] RAG 기반 이미지 복구 실패: {e}")
+        elif not use_rag_images and getattr(current_profile, 'multimodal', False):
+            print("🔍 [DEBUG] RAG 이미지 복구 비활성화됨(use_rag_images=False) - 텍스트 전용 유지")
         # 항상 참조 가능한 max_images 정의 (이미지 없으면 0)
+        # 1차 상한은 4장으로 제한 (최종 선택은 예산 기반 동적 선택에서 결정)
+        max_images = min(len([img for img in all_image_data if img]) if all_image_data else 0, max_images_limit)
         if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
             print(f"🔍 [DEBUG] 이미지 처리 시작 - 총 이미지 개수: {len([img for img in all_image_data if img])}")
                     if image_bytes:
                         try:
                             pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+                            # 수동 리사이즈 우선순위: API 파라미터 > 코드 상수 > 환경변수
+                            env_short_side = os.getenv('LILY_IMAGE_SHORT_SIDE')
+                            try:
+                                env_short_side_val = int(env_short_side) if env_short_side is not None else None
+                            except Exception:
+                                env_short_side_val = None
+                            effective_short_side = image_short_side or DEFAULT_IMAGE_SHORT_SIDE or env_short_side_val
+                            if effective_short_side:
+                                s = max(128, min(int(effective_short_side), 2048))
+                                w, h = pil_image.size
+                                if w > 0 and h > 0:
+                                    if w <= h:
+                                        new_w = s; new_h = int(h * (s / w))
+                                    else:
+                                        new_h = s; new_w = int(w * (s / h))
+                                    pil_image = pil_image.resize((max(1, new_w), max(1, new_h)))
                             # 🔄 공식 이미지 프로세서 사용
                             if processor and hasattr(processor, 'image_processor'):
                                 processed = processor.image_processor(pil_image)
                     if image_bytes:
                         try:
                             pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+                            # 수동 리사이즈 우선순위: API 파라미터 > 코드 상수 > 환경변수
+                            env_short_side = os.getenv('LILY_IMAGE_SHORT_SIDE')
+                            try:
+                                env_short_side_val = int(env_short_side) if env_short_side is not None else None
+                            except Exception:
+                                env_short_side_val = None
+                            effective_short_side = image_short_side or DEFAULT_IMAGE_SHORT_SIDE or env_short_side_val
+                            if effective_short_side:
+                                s = max(128, min(int(effective_short_side), 2048))
+                                w, h = pil_image.size
+                                if w > 0 and h > 0:
+                                    if w <= h:
+                                        new_w = s; new_h = int(h * (s / w))
+                                    else:
+                                        new_h = s; new_w = int(w * (s / h))
+                                    pil_image = pil_image.resize((max(1, new_w), max(1, new_h)))
                             # 🔄 공식 이미지 프로세서 사용
                             if processor and hasattr(processor, 'image_processor'):
                                 # KananaVImageProcessor는 기본 파라미터만 지원
                 logger.error(f"❌ 이미지 전처리 실패: {e}")
                 combined_image_metas = {}
+            # 🔧 이미지 토큰 예산 기반 동적 선택 (멀티모달 길이 초과 방지)
+            try:
+                # 1) 이미지별 토큰 수 산출
+                per_image_tokens: List[int] = []
+                if isinstance(combined_image_metas, dict) and 'image_token_thw' in combined_image_metas:
+                    for thw in combined_image_metas['image_token_thw']:
+                        if isinstance(thw, (list, tuple)) and len(thw) == 3:
+                            per_image_tokens.append(int(thw[0]) * int(thw[1]) * int(thw[2]))
+                        else:
+                            per_image_tokens.append(0)
+                else:
+                    # 메타가 없으면 보수적으로 큰 값으로 간주하여 텍스트-only로 유도
+                    per_image_tokens = [3000] * len(all_pixel_values)
+                # 2) 텍스트 길이 측정 (이미지 토큰 제외한 프롬프트)
+                base_text_prompt = f"Human: {prompt}\nAssistant:"
+                text_inputs = tokenizer(
+                    base_text_prompt,
+                    return_tensors="pt",
+                    padding=False,
+                    truncation=True,
+                    max_length=2048,
+                )
+                text_len = int(text_inputs['input_ids'].shape[-1]) if 'input_ids' in text_inputs else 0
+                # 3) 이미지 토큰 예산 계산 (여유 마진 16)
+                total_budget = 2048
+                margin = 16
+                allowed_image_tokens = max(0, total_budget - text_len - margin)
+                print(f"🔍 [DEBUG] 토큰 예산: text={text_len}, allowed_image={allowed_image_tokens}")
+                # 4) 예산 내 최대 이미지 수 선택 (앞에서부터 그리디)
+                selected_indices: List[int] = []
+                cum = 0
+                for i, tok in enumerate(per_image_tokens):
+                    if cum + tok <= allowed_image_tokens:
+                        selected_indices.append(i)
+                        cum += tok
+                    else:
+                        break
+                # 5) 적용: 선택된 이미지만 유지
+                if selected_indices and len(selected_indices) < len(all_pixel_values):
+                    print(f"🔧 [DEBUG] 이미지 선택: {len(all_pixel_values)} -> {len(selected_indices)} (cum_tokens={cum})")
+                    all_pixel_values = [all_pixel_values[i] for i in selected_indices]
+                    if isinstance(combined_image_metas, dict):
+                        for key in list(combined_image_metas.keys()):
+                            try:
+                                combined_image_metas[key] = [combined_image_metas[key][i] for i in selected_indices]
+                            except Exception:
+                                pass
+                    # 최종 max_images 갱신
+                    max_images = len(all_pixel_values)
+                elif not selected_indices:
+                    print("⚠️ [DEBUG] 이미지 예산 부족 → 텍스트-only로 전환")
+                    all_pixel_values = []
+                    combined_image_metas = {}
+                    max_images = 0
+            except Exception as _e_budget:
+                print(f"⚠️ [DEBUG] 이미지 예산 계산 실패: {_e_budget}")
         # --- 2. 프롬프트 구성 ---
         print(f"🔍 [DEBUG] 프롬프트 구성 시작")
+        # 컨텍스트 통합 (대화 기록 + 선택적 RAG 텍스트 검색) - 모델별 최적화
         context_prompt = ""
         if use_context and session_id:
             try:
                 except Exception as e:
                     print(f"⚠️ [DEBUG] 컨텍스트 로드 실패: {e}")
                     context_prompt = ""
+                # 선택적 RAG 텍스트 검색 통합
+                if use_rag_text:
+                    try:
+                        from lily_llm_core.vector_store_manager import vector_store_manager as _vsm
+                        # 대상 문서 선택: 명시적 document_id > 최신 문서
+                        target_doc_id = document_id
+                        if not target_doc_id:
+                            user_docs = _vsm.get_user_documents(user_id)
+                            if user_docs:
+                                user_docs.sort(key=lambda d: d.get('last_updated') or d.get('created_at') or 0, reverse=True)
+                                target_doc_id = user_docs[0].get('document_id')
+                        # 검색
+                        if target_doc_id:
+                            docs = _vsm.search_similar(user_id, target_doc_id, prompt, k=3)
+                            rag_contexts = []
+                            for d in docs:
+                                try:
+                                    preview = getattr(d, 'page_content', '')
+                                    rag_contexts.append(preview)
+                                except Exception:
+                                    continue
+                            if rag_contexts:
+                                rag_text = "\n\n".join(rag_contexts)
+                                context_prompt = (context_prompt or "") + f"[RAG]\n{rag_text}\n\n"
+                                print(f"🔍 [DEBUG] RAG 텍스트 컨텍스트 통합: {len(rag_text)}자, 문서={target_doc_id}")
+                    except Exception as _re:
+                        print(f"⚠️ [DEBUG] RAG 텍스트 검색 실패: {_re}")
                 # 이미지 바이트를 세션 캐시에 보관 (다음 턴에 재사용)
                 if session_id:
                     # 원본 요청에 이미지가 있었다면 그걸 우선 보관, 없으면 복구된 이미지 유지
         # formatted_prompt 초기화
         formatted_prompt = None
+        # 멀티모달용 RAG 스니펫(길이 제한) 준비
+        rag_snippet_short = ""
+        if context_prompt and isinstance(context_prompt, str):
+            try:
+                # 과도한 길이 방지: 우선 256자로 제한
+                rag_snippet_short = context_prompt[:256]
+                if not rag_snippet_short.endswith("\n"):
+                    rag_snippet_short += "\n"
+            except Exception:
+                rag_snippet_short = ""
         # 🔄 멀티모달 프롬프트 구성 (공식 방식)
         if all_pixel_values and len(all_pixel_values) > 0:
             num_images = len(all_pixel_values)
             image_tokens = "<image>" * num_images  # 이미지 개수만큼 <image> 토큰 생성
             # 답변 유도를 위해 Assistant 프리픽스 추가
+            # 길이 초과를 방지하기 위해 멀티모달 경로에서는 사용자 입력만 포함
             formatted_prompt = f"Human: {image_tokens}{prompt}\nAssistant:"
             print(f"🔍 [DEBUG] 멀티모달 프롬프트 구성 (공식 형식): {formatted_prompt}")
             print(f"🔍 [DEBUG] 이미지 토큰 생성: {num_images}개 이미지 -> {image_tokens}")
                     # 🔄 최종 검증
                     print(f"🔍 [DEBUG] 최종 input_ids 타입: {type(input_ids)}, shape: {input_ids.shape}")
                     print(f"🔍 [DEBUG] 최종 attention_mask 타입: {type(attention_mask)}, shape: {attention_mask.shape}")
+                    # -1 토큰(이미지 자리표시자) 존재 검증 및 단계적 재시도
+                    try:
+                        neg_exists = (input_ids == -1).any().item() if hasattr(input_ids, 'any') else False
+                    except Exception:
+                        neg_exists = False
+                    if not neg_exists and len(all_pixel_values) > 0:
+                        print("⚠️ [DEBUG] -1 토큰 없음 → RAG 스니펫 길이 줄여 재시도")
+                        for limit in [128, 64, 0]:
+                            try:
+                                base_snippet = (context_prompt or "")[:limit]
+                                if base_snippet and not base_snippet.endswith("\n"):
+                                    base_snippet += "\n"
+                                base_prompt_retry = f"Human: {'<image>' * len(all_pixel_values)}{base_snippet}{prompt}\nAssistant:"
+                                print(f"🔁 [DEBUG] 재시도 limit={limit}: {base_prompt_retry}")
+                                inputs_retry = tokenizer.encode_prompt(
+                                    prompt=base_prompt_retry,
+                                    max_length=2048,
+                                    image_meta=final_meta
+                                )
+                                # 정규화
+                                if 'seq_length' in inputs_retry:
+                                    del inputs_retry['seq_length']
+                                _input_ids = inputs_retry['input_ids'][0] if isinstance(inputs_retry['input_ids'], tuple) else inputs_retry['input_ids']
+                                _neg = (_input_ids == -1).any().item() if hasattr(_input_ids, 'any') else False
+                                if _neg:
+                                    inputs = inputs_retry
+                                    input_ids = _input_ids
+                                    attention_mask = inputs_retry['attention_mask'][0] if isinstance(inputs_retry['attention_mask'], tuple) else inputs_retry['attention_mask']
+                                    formatted_prompt = base_prompt_retry
+                                    print("✅ [DEBUG] 재시도 성공: -1 토큰 확보")
+                                    break
+                            except Exception as _re_try:
+                                print(f"⚠️ [DEBUG] 재시도 실패(limit={limit}): {_re_try}")
                 except Exception as e:
+                    print(f"❌ [DEBUG] encode_prompt 실패: {e}")
+                    # 1차 재시도: 컨텍스트 제거하고 이미지+질문만으로 재시도
+                    try:
+                        base_prompt_retry = f"Human: {'<image>' * len(all_pixel_values)}{prompt}\nAssistant:"
+                        print(f"🔁 [DEBUG] encode_prompt 재시도(컨텍스트 제거): {base_prompt_retry}")
+                        inputs = tokenizer.encode_prompt(
+                            prompt=base_prompt_retry,
+                            max_length=2048,
+                            image_meta=final_meta
+                        )
+                        print(f"🔍 [DEBUG] encode_prompt 재시도 성공: {list(inputs.keys())}")
+                    except Exception as e2:
+                        print(f"❌ [DEBUG] encode_prompt 재시도 실패: {e2}. 텍스트-only로 폴백")
+                        # 최종 폴백: 텍스트-only 경로로 전환(이미지 비활성화)
+                        all_pixel_values = []
+                        image_processed = False
+                        inputs = tokenizer(
+                            formatted_prompt if formatted_prompt else prompt,
+                            return_tensors="pt",
+                            padding=True,
+                            truncation=True,
+                            max_length=2048,
+                        )
+                        if 'token_type_ids' in inputs:
+                            del inputs['token_type_ids']
+                        input_ids = inputs['input_ids']
+                        attention_mask = inputs['attention_mask']
             else:
                 # 안전 폴백
                 print(f"🔍 [DEBUG] 기본 토크나이저 사용 (폴백)")
                             else:
                                 processed_image_metas[key] = value
+                    # 🔒 안전 가드: image_token_thw가 비정상일 때 -1 토큰이 생성되지 않도록 방지
+                    try:
+                        if 'image_token_thw' in processed_image_metas:
+                            it = processed_image_metas['image_token_thw']
+                            if isinstance(it, torch.Tensor) and (it.numel() == 0 or it.shape[-1] != 3):
+                                print(f"⚠️ [DEBUG] image_token_thw 비정상: {it.shape if hasattr(it,'shape') else type(it)} -> 안전 기본값 적용")
+                                processed_image_metas['image_token_thw'] = torch.tensor([[1,1,1]] * len(all_pixel_values), dtype=torch.long).unsqueeze(0)
+                    except Exception as _safe_e:
+                        print(f"⚠️ [DEBUG] image_token_thw 안전화 실패: {_safe_e}")
                     # 안전 가드: vision_grid_thw가 [1, N, 3]로 오면 [N, 3]로 변환
                     try: