Spaces:

gbrabbit
/

lily_fast_api

Sleeping

App Files Files Community

gbrabbit commited on Aug 24, 2025

Commit

dd1d3d2

1 Parent(s): cbf7778

Auto commit at 24-2025-08 9:25:04

Browse files

Files changed (7) hide show

lily_llm_api/api/routers/document_router.py +92 -28
lily_llm_api/api/routers/generation_router.py +17 -1
lily_llm_api/app.py +2 -2
lily_llm_api/models/kanana_1_5_v_3b_instruct.py +1 -1
lily_llm_api/services/generation_service.py +383 -77
lily_llm_api/services/session_registry.py +53 -0
lily_llm_core/document_processor.py +54 -65

lily_llm_api/api/routers/document_router.py CHANGED Viewed

@@ -5,11 +5,14 @@ from fastapi import APIRouter, HTTPException, UploadFile, File, Form
 from typing import Optional, List
 import logging
 import time
 from ...models.schemas import (
     DocumentUploadResponse, RAGQueryRequest, RAGQueryResponse,
     DocumentProcessResponse, MultimodalRAGResponse
 )
 logger = logging.getLogger(__name__)
 router = APIRouter()
@@ -24,27 +27,36 @@ async def upload_document(
     try:
         start_time = time.time()
-        # 파일 읽기
         content = await file.read()
         filename = file.filename
-        # 문서 처리기 사용
         try:
-            from lily_llm_core.document_processor import document_processor
-            # 문서 처리
-            result = document_processor.process_document(
-                content=content,
-                filename=filename,
                 user_id=user_id,
-                room_id=room_id
             )
             if result.get("success"):
                 processing_time = time.time() - start_time
                 return DocumentUploadResponse(
                     success=True,
-                    document_id=result.get("document_id", "unknown"),
                     message="문서 업로드 및 처리 완료",
                     chunks=result.get("chunks", 0),
                     latex_count=result.get("latex_count", 0),
@@ -59,12 +71,26 @@ async def upload_document(
                 )
         except ImportError:
-            return DocumentUploadResponse(
-                success=False,
-                document_id="",
-                message="문서 처리기 import 실패",
-                error="Document processor not available"
-            )
     except Exception as e:
         logger.error(f"문서 업로드 실패: {e}")
@@ -269,23 +295,29 @@ async def batch_process_documents(
         results = []
         try:
-            from lily_llm_core.document_processor import document_processor
             for file in files:
                 content = await file.read()
                 filename = file.filename
-                result = document_processor.process_document(
-                    content=content,
-                    filename=filename,
                     user_id=user_id,
-                    room_id=room_id
                 )
                 results.append({
                     "filename": filename,
                     "success": result.get("success", False),
-                    "document_id": result.get("document_id", ""),
                     "chunks": result.get("chunks", 0),
                     "error": result.get("error")
                 })
@@ -299,7 +331,35 @@ async def batch_process_documents(
             }
         except ImportError:
-            raise HTTPException(status_code=500, detail="Document processor not available")
     except Exception as e:
         logger.error(f"일괄 문서 처리 실패: {e}")
@@ -350,11 +410,15 @@ async def upload_multimodal_document(
         try:
             from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
-            # 멀티모달 문서 처리
             result = hybrid_rag_processor.process_document(
-                content=content,
-                filename=filename,
                 user_id=user_id,
                 room_id=room_id
             )

 from typing import Optional, List
 import logging
 import time
+import os
+import uuid
 from ...models.schemas import (
     DocumentUploadResponse, RAGQueryRequest, RAGQueryResponse,
     DocumentProcessResponse, MultimodalRAGResponse
 )
+from ...services.session_registry import set_user_for_room
 logger = logging.getLogger(__name__)
 router = APIRouter()
     try:
         start_time = time.time()
+        # 파일 읽기 및 임시 저장 (파일 경로 기반 처리기 호환)
         content = await file.read()
         filename = file.filename
+        temp_dir = os.path.join("data", "uploads")
+        os.makedirs(temp_dir, exist_ok=True)
+        temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
+        temp_path = os.path.join(temp_dir, temp_name)
+        with open(temp_path, "wb") as f:
+            f.write(content)
+        # 문서 처리기 사용 (우선 RAG에 저장 포함 경로)
         try:
+            from lily_llm_core.rag_processor import rag_processor
+            document_id = f"doc_{int(time.time()*1000)}_{uuid.uuid4().hex}"
+            result = rag_processor.process_and_store_document(
                 user_id=user_id,
+                document_id=document_id,
+                file_path=temp_path,
             )
+            # 업로드 시 방-사용자 매핑 저장 (후속 생성에서 자동 보정)
+            try:
+                set_user_for_room(room_id, user_id)
+            except Exception:
+                pass
             if result.get("success"):
                 processing_time = time.time() - start_time
                 return DocumentUploadResponse(
                     success=True,
+                    document_id=result.get("document_id", document_id),
                     message="문서 업로드 및 처리 완료",
                     chunks=result.get("chunks", 0),
                     latex_count=result.get("latex_count", 0),
                 )
         except ImportError:
+            # 폴백: 순수 문서 파서로 처리만 수행
+            try:
+                from lily_llm_core.document_processor import document_processor
+                docs = document_processor.process_document(temp_path)
+                processing_time = time.time() - start_time
+                return DocumentUploadResponse(
+                    success=True,
+                    document_id="",
+                    message="문서 업로드 및 처리 완료 (벡터 저장 미수행)",
+                    chunks=len(docs) if docs else 0,
+                    latex_count=0,
+                    auto_response=None
+                )
+            except Exception as e:
+                return DocumentUploadResponse(
+                    success=False,
+                    document_id="",
+                    message="문서 처리기 import 실패",
+                    error=str(e)
+                )
     except Exception as e:
         logger.error(f"문서 업로드 실패: {e}")
         results = []
         try:
+            from lily_llm_core.rag_processor import rag_processor
             for file in files:
                 content = await file.read()
                 filename = file.filename
+                # 임시 저장 후 RAG에 저장 포함 처리
+                temp_dir = os.path.join("data", "uploads")
+                os.makedirs(temp_dir, exist_ok=True)
+                temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
+                temp_path = os.path.join(temp_dir, temp_name)
+                with open(temp_path, "wb") as f:
+                    f.write(content)
+                document_id = f"doc_{int(time.time()*1000)}_{uuid.uuid4().hex}"
+                result = rag_processor.process_and_store_document(
                     user_id=user_id,
+                    document_id=document_id,
+                    file_path=temp_path,
                 )
                 results.append({
                     "filename": filename,
                     "success": result.get("success", False),
+                    "document_id": result.get("document_id", document_id),
                     "chunks": result.get("chunks", 0),
                     "error": result.get("error")
                 })
             }
         except ImportError:
+            # 폴백: 저장 없이 처리만 수행
+            try:
+                from lily_llm_core.document_processor import document_processor
+                for file in files:
+                    content = await file.read()
+                    filename = file.filename
+                    temp_dir = os.path.join("data", "uploads")
+                    os.makedirs(temp_dir, exist_ok=True)
+                    temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
+                    temp_path = os.path.join(temp_dir, temp_name)
+                    with open(temp_path, "wb") as f:
+                        f.write(content)
+                    docs = document_processor.process_document(temp_path)
+                    results.append({
+                        "filename": filename,
+                        "success": bool(docs),
+                        "document_id": "",
+                        "chunks": len(docs) if docs else 0,
+                        "error": None if docs else "processing failed"
+                    })
+                processing_time = time.time() - start_time
+                return {
+                    "status": "success",
+                    "results": results,
+                    "total_files": len(files),
+                    "processing_time": processing_time
+                }
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=str(e))
     except Exception as e:
         logger.error(f"일괄 문서 처리 실패: {e}")
         try:
             from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
+            # 임시 저장 후 파일 경로 기반 처리
+            temp_dir = os.path.join("data", "uploads")
+            os.makedirs(temp_dir, exist_ok=True)
+            temp_name = f"{int(time.time()*1000)}_{uuid.uuid4().hex}_{filename}"
+            temp_path = os.path.join(temp_dir, temp_name)
+            with open(temp_path, "wb") as f:
+                f.write(content)
             result = hybrid_rag_processor.process_document(
+                file_path=temp_path,
                 user_id=user_id,
                 room_id=room_id
             )

lily_llm_api/api/routers/generation_router.py CHANGED Viewed

@@ -10,6 +10,7 @@ from ...models.schemas import GenerateResponse, MultimodalGenerateResponse
 from ...services.generation_service import generate_sync
 from ...services.model_service import is_model_loaded
 from ...utils.system_utils import select_model_interactive
 logger = logging.getLogger(__name__)
 router = APIRouter()
@@ -31,12 +32,27 @@ async def generate(request: Request,
     start_time = time.time()
     # 세션 ID가 없으면 자동 생성 (채팅방별 고유 세션)
     if not session_id:
-        # 채팅방 + 사용자 + 타임스탬프 기반으로 고유한 세션 생성
         timestamp = int(time.time())
         session_id = f"room_{room_id}_user_{user_id}_{timestamp}"
         print(f"🔍 [DEBUG] 자동 세션 ID 생성: {session_id} (채팅방: {room_id}, 사용자: {user_id})")
     if use_context:
         try:

 from ...services.generation_service import generate_sync
 from ...services.model_service import is_model_loaded
 from ...utils.system_utils import select_model_interactive
+from ...services.session_registry import get_user_for_room, set_user_for_room, set_user_for_session
 logger = logging.getLogger(__name__)
 router = APIRouter()
     start_time = time.time()
+    # 사용자가 비어있거나 anonymous면 룸 기반 최근 사용자 보정
+    if not user_id or user_id == "anonymous":
+        try:
+            recovered_user = get_user_for_room(room_id)
+            if recovered_user:
+                print(f"🔍 [DEBUG] 룸 기반 사용자 보정: {user_id} -> {recovered_user} (room={room_id})")
+                user_id = recovered_user
+        except Exception:
+            pass
     # 세션 ID가 없으면 자동 생성 (채팅방별 고유 세션)
     if not session_id:
         timestamp = int(time.time())
         session_id = f"room_{room_id}_user_{user_id}_{timestamp}"
         print(f"🔍 [DEBUG] 자동 세션 ID 생성: {session_id} (채팅방: {room_id}, 사용자: {user_id})")
+    else:
+        # 제공된 세션에도 사용자 매핑 저장
+        try:
+            set_user_for_session(session_id, user_id)
+        except Exception:
+            pass
     if use_context:
         try:

lily_llm_api/app.py CHANGED Viewed

@@ -1490,7 +1490,7 @@ async def manual_cleanup_all_sessions():
     except Exception as e:
         return {"status": "error", "message": str(e)}
-@app.post("/generate", response_model=GenerateResponse)
 async def generate(request: Request,
                   prompt: str = Form(...),
                   image1: UploadFile = File(None),
@@ -1634,7 +1634,7 @@ async def generate_multimodal(prompt: str = Form(...),
-@app.get("/models")
 async def list_models():
     """사용 가능한 모델 목록"""
     return {

     except Exception as e:
         return {"status": "error", "message": str(e)}
+@app.post("/api/v2/generate", response_model=GenerateResponse)
 async def generate(request: Request,
                   prompt: str = Form(...),
                   image1: UploadFile = File(None),
+@app.get("/api/v2/models")
 async def list_models():
     """사용 가능한 모델 목록"""
     return {

lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED Viewed

@@ -246,7 +246,7 @@ class Kanana15V3bInstructProfile:
             "<|-im_end|>",  # 🔄 잘못된 토큰 추가
             "<image>",
             "user\n",
-            "assistant\n"
         ]
         for pattern in patterns_to_remove:

             "<|-im_end|>",  # 🔄 잘못된 토큰 추가
             "<image>",
             "user\n",
+            "assistant\n"
         ]
         for pattern in patterns_to_remove:

lily_llm_api/services/generation_service.py CHANGED Viewed

@@ -3,13 +3,26 @@ Generation service for Lily LLM API
 """
 import logging
 import time
-from typing import Optional, List
 from PIL import Image
 import io
 import torch
 logger = logging.getLogger(__name__)
 def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
                  temperature: Optional[float] = None, top_p: Optional[float] = None,
                  do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
@@ -21,6 +34,31 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
         current_profile = get_current_profile()
         current_model = get_current_model()
         print(f"🔍 [DEBUG] generate_sync 시작 - prompt 길이: {len(prompt)}")
         print(f"🔍 [DEBUG] 현재 로드된 모델: {current_profile.display_name if current_profile else 'None'}")
@@ -47,16 +85,63 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
         if image_data_list and len([img for img in image_data_list if img]) > 0:
             all_image_data.extend(image_data_list)
             print(f"🔍 [DEBUG] 직접 전달된 이미지 {len(image_data_list)}개 추가")
         if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
             print(f"🔍 [DEBUG] 이미지 처리 시작 - 총 이미지 개수: {len([img for img in all_image_data if img])}")
             # 🔄 공식 방식: 간단한 이미지 처리
-            max_images = min(len(all_image_data), 4)
             logger.info(f"🖼️  멀티모달 처리 시작... (이미지 {max_images}개)")
             try:
                 metas_list = []
                 for idx, image_bytes in enumerate(all_image_data[:max_images]):
                     if image_bytes:
                         try:
@@ -64,7 +149,6 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                             # 🔄 공식 이미지 프로세서 사용
                             if processor and hasattr(processor, 'image_processor'):
                                 processed = processor.image_processor(pil_image)
-                                all_pixel_values.append(processed["pixel_values"])
                                 metas_list.append(processed.get("image_meta", {}))
                             else:
                                 logger.warning(f"⚠️ 이미지 프로세서를 찾을 수 없음")
@@ -79,6 +163,28 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     print(f"🔍 [DEBUG] 이미지 메타데이터: {combined_image_metas}")
                 else:
                     combined_image_metas = {}
             except Exception as e:
                 logger.error(f"❌ 이미지 전처리 실패: {e}")
                 combined_image_metas = {}
@@ -103,7 +209,17 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                 except Exception as e:
                     print(f"⚠️ [DEBUG] 컨텍스트 로드 실패: {e}")
                     context_prompt = ""
             except Exception as e:
                 print(f"⚠️ [DEBUG] 컨텍스트 로드 실패: {e} (세션: {session_id})")
                 context_prompt = ""
@@ -113,9 +229,13 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
         # 🔄 멀티모달 프롬프트 구성 (공식 방식)
         if all_pixel_values and len(all_pixel_values) > 0:
-            # 🔄 공식 Kanana 형식: Human: <image> 텍스트
-            formatted_prompt = f"Human: <image>{prompt}"
             print(f"🔍 [DEBUG] 멀티모달 프롬프트 구성 (공식 형식): {formatted_prompt}")
             image_processed = True
         else:
             image_processed = False
@@ -180,32 +300,106 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
             if hasattr(tokenizer, 'encode_prompt'):
                 print(f"🔍 [DEBUG] encode_prompt 메서드 사용")
-                # 안전한 메타데이터 생성
-                safe_image_meta = {
-                    'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
-                    'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
-                }
                 try:
                     inputs = tokenizer.encode_prompt(
                         prompt=formatted_prompt,
-                        max_length=2048,
-                        image_meta=safe_image_meta
                     )
                     if 'seq_length' in inputs:
                         del inputs['seq_length']
-                    input_ids = inputs['input_ids']
-                    attention_mask = inputs['attention_mask']
-                    # 튜플인 경우 첫 번째 요소 사용
-                    if isinstance(input_ids, tuple):
-                        input_ids = input_ids[0]
-                    if isinstance(attention_mask, tuple):
-                        attention_mask = attention_mask[0]
                 except Exception as e:
                     print(f"❌ [DEBUG] encode_prompt 실패: {e}, 폴백 사용")
                     # 폴백: 기본 토크나이저 사용
@@ -257,6 +451,11 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
         print(f"🔍 [DEBUG] 최종 input_ids shape: {input_ids.shape}")
         print(f"🔍 [DEBUG] 입력 토큰 수: {input_ids.shape[1]}")
         # --- 4. 생성 설정 ---
         print(f"🔍 [DEBUG] 생성 설정 구성 시작")
         gen_config = current_profile.get_generation_config()
@@ -316,9 +515,20 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     print(f"🔍 [DEBUG] 멀티모달 추론 실행")
                     print(f"🔍 [DEBUG] 이미지 텐서 개수: {len(all_pixel_values)}")
-                    # 이미지 텐서도 디바이스 확인
                     pixel_values = torch.cat(all_pixel_values, dim=0)
                     print(f"🔍 [DEBUG] 결합된 이미지 텐서 shape: {pixel_values.shape}")
                     print(f"🔍 [DEBUG] 이미지 텐서 dtype: {pixel_values.dtype}")
                     # 🔄 모델과 동일한 dtype으로 변환 (성능 최적화)
@@ -342,6 +552,82 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                     print(f"🔍 [DEBUG] 최종 이미지 텐서 dtype: {pixel_values.dtype}")
                     print(f"🔍 [DEBUG] 모델 생성 시작 - 멀티모달")
                     # LoRA 어댑터가 적용된 모델인지 확인
                     try:
                         from lily_llm_core.lora_manager import lora_manager
@@ -351,79 +637,69 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
                             lora_model = lora_manager.get_model()
                             if lora_model:
                                 print(f"🔍 [DEBUG] LoRA 모델로 멀티모달 생성 실행")
-                                # 🔄 image_metas 파라미터 추가 (공식 방식)
-                                processed_image_metas = {}
-                                # 🔄 공식 방식: vision_grid_thw를 텐서로 변환
-                                if 'vision_grid_thw' in combined_image_metas:
-                                    vision_grid = combined_image_metas['vision_grid_thw']
-                                    if isinstance(vision_grid, list):
-                                        # 🔄 Kanana 모델 요구사항: 배치 차원을 맞춤
-                                        if len(vision_grid) == 1 and len(vision_grid[0]) == 3:
-                                            # [(1, 34, 52)] -> (1, 1, 34, 52) 텐서로 변환 (배치 차원 추가)
-                                            t, h, w = vision_grid[0]
-                                            # 🔄 4차원 텐서로 변환: (batch_size, T, H, W) 형태
-                                            processed_image_metas['vision_grid_thw'] = torch.tensor([[[t, h, w]]], dtype=torch.long)
-                                            print(f"🔍 [DEBUG] vision_grid_thw 텐서 변환: {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
-                                        else:
-                                            # 🔄 다른 형태의 경우 배치 차원 추가
-                                            processed_image_metas['vision_grid_thw'] = torch.tensor([vision_grid], dtype=torch.long)
-                                            print(f"🔍 [DEBUG] vision_grid_thw 텐서 변환 (기본): {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
-                                    else:
-                                        # 텐서인 경우 배치 차원 확인 및 추가
-                                        if len(vision_grid.shape) == 3:
-                                            processed_image_metas['vision_grid_thw'] = vision_grid.unsqueeze(0)
-                                        else:
-                                            processed_image_metas['vision_grid_thw'] = vision_grid
-                                # 🔄 다른 메타데이터도 배치 차원 맞춤
-                                for key, value in combined_image_metas.items():
-                                    if key != 'vision_grid_thw':
-                                        if isinstance(value, list):
-                                            # 리스트인 경우 배치 차원 추가
-                                            processed_image_metas[key] = [value]
-                                        elif isinstance(value, torch.Tensor) and len(value.shape) == 2:
-                                            # 2차원 텐서인 경우 배치 차원 추가
-                                            processed_image_metas[key] = value.unsqueeze(0)
-                                        else:
-                                            processed_image_metas[key] = value
                                 generate_kwargs = {
                                     'input_ids': input_ids,
                                     'attention_mask': attention_mask,
                                     'pixel_values': pixel_values,
-                                    'image_metas': processed_image_metas,  # 🔄 처리된 이미지 메타데이터
                                     **gen_config
                                 }
                                 print(f"🔍 [DEBUG] LoRA 모델 생성 파라미터: {list(generate_kwargs.keys())}")
                                 print(f"🔍 [DEBUG] 처리된 image_metas: {list(processed_image_metas.keys())}")
                                 print(f"🔍 [DEBUG] 모델 생성 시작... (타임아웃 없음)")
-                                generated_ids = lora_model.generate(**generate_kwargs)
                             else:
                                 print(f"⚠️ [DEBUG] LoRA 모델을 가져올 수 없음, 기본 모델 사용")
-                                generated_ids = current_model.generate(
-                                    input_ids=input_ids,
-                                    attention_mask=attention_mask,
-                                    pixel_values=pixel_values,
                                     **gen_config
-                                )
                         else:
                             print(f"🔍 [DEBUG] LoRA 어댑터 없음 (멀티모달), 기본 모델 사용")
-                            generated_ids = current_model.generate(
-                                input_ids=input_ids,
-                                attention_mask=attention_mask,
-                                pixel_values=pixel_values,
                                 **gen_config
-                            )
                     except ImportError:
                         print(f"🔍 [DEBUG] LoRA 지원 안됨, 기본 모델 사용")
-                        generated_ids = current_model.generate(
-                            input_ids=input_ids,
-                            attention_mask=attention_mask,
-                            pixel_values=pixel_values,
                             **gen_config
-                        )
                 else:
                     # 텍스트-only: 기존 방식
@@ -574,10 +850,40 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
             traceback.print_exc()
             return {"error": f"Response extraction failed: {str(e)}"}
-        # --- 7. 결과 반환 ---
         total_time = time.time() - t_tok_start
         print(f"🔍 [DEBUG] 전체 처리 완료 - 총 소요시간: {total_time:.3f}초")
         return {
             "generated_text": response,
             "processing_time": total_time,

 """
 import logging
 import time
+from typing import Optional, List, Dict
+from pathlib import Path
+from .session_registry import get_user_for_room, get_user_for_session, set_user_for_session
 from PIL import Image
 import io
 import torch
 logger = logging.getLogger(__name__)
+# 세션별 최근 이미지 캐시 (간단한 인메모리)
+# 주의: 프로세스 재시작 시 초기화됨. 최대 4장 보관.
+_session_image_cache: Dict[str, List[bytes]] = {}
+# 선택적: 벡터 스토어에서 최근 문서 이미지 복구 지원
+try:
+    from lily_llm_core.vector_store_manager import vector_store_manager, SimpleVectorStore
+except Exception:
+    vector_store_manager = None
+    SimpleVectorStore = None
 def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
                  temperature: Optional[float] = None, top_p: Optional[float] = None,
                  do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
         current_profile = get_current_profile()
         current_model = get_current_model()
+        # 사용자 보정: session/room 기반 최근 사용자 복구
+        try:
+            if (not user_id) or (user_id == "anonymous"):
+                recovered = get_user_for_session(session_id) or get_user_for_room(room_id)
+                if recovered:
+                    print(f"🔍 [DEBUG] 사용자 보정: {user_id} -> {recovered} (room={room_id}, session={session_id})")
+                    user_id = recovered
+        except Exception:
+            pass
+        # 세션 ID 정규화: 제공되지 않거나 일회성으로 보이는 경우 룸/사용자 기반으로 고정
+        original_session_id = session_id
+        if not session_id or (isinstance(session_id, str) and session_id.startswith("room_") and session_id.count("_") >= 3):
+            # 예: room_default_user_anonymous_17559... 형태를 안정적인 키로 치환
+            stable_user = user_id or "anonymous"
+            stable_room = room_id or "default"
+            session_id = f"room_{stable_room}_{stable_user}"
+            if original_session_id and original_session_id != session_id:
+                print(f"🔍 [DEBUG] 세션 ID 정규화: {original_session_id} -> {session_id}")
+        # 세션-사용자 매핑 저장
+        try:
+            set_user_for_session(session_id, user_id)
+        except Exception:
+            pass
         print(f"🔍 [DEBUG] generate_sync 시작 - prompt 길이: {len(prompt)}")
         print(f"🔍 [DEBUG] 현재 로드된 모델: {current_profile.display_name if current_profile else 'None'}")
         if image_data_list and len([img for img in image_data_list if img]) > 0:
             all_image_data.extend(image_data_list)
             print(f"🔍 [DEBUG] 직접 전달된 이미지 {len(image_data_list)}개 추가")
+        else:
+            # 현재 요청에 이미지가 없으면 세션 캐시에서 복구 시도
+            if session_id and session_id in _session_image_cache and len(_session_image_cache[session_id]) > 0:
+                cached_imgs = _session_image_cache[session_id]
+                all_image_data.extend(cached_imgs)
+                print(f"🔍 [DEBUG] 세션 캐시에서 이전 이미지 {len(cached_imgs)}개 복구 (세션: {session_id})")
+        # 추가 복구: 여전히 이미지가 없고 멀티모달이면, 최근 RAG 문서에서 이미지 바이트 복원
+        if (not all_image_data or len([img for img in all_image_data if img]) == 0) and getattr(current_profile, 'multimodal', False):
+            try:
+                if vector_store_manager is not None:
+                    # 사용자 문서 목록 가져오기 (최신순 정렬)
+                    user_docs = vector_store_manager.get_user_documents(user_id)
+                    if user_docs:
+                        # last_updated > created_at 우선 사용
+                        def _ts(d: Dict):
+                            return d.get('last_updated') or d.get('created_at') or 0
+                        user_docs.sort(key=_ts, reverse=True)
+                        latest_doc_id = user_docs[0].get('document_id')
+                        base_path = getattr(vector_store_manager, 'base_path', Path('./vector_stores'))
+                        store_path = Path(base_path) / user_id / latest_doc_id
+                        if SimpleVectorStore is not None:
+                            store = SimpleVectorStore.load_local(str(store_path))
+                            recovered = []
+                            for doc in getattr(store, 'documents', []) or []:
+                                try:
+                                    meta = getattr(doc, 'metadata', {}) or {}
+                                    imgs = meta.get('image_data_list')
+                                    if imgs and isinstance(imgs, list):
+                                        # bytes 만 필터링
+                                        recovered.extend([b for b in imgs if isinstance(b, (bytes, bytearray)) and len(b) > 0])
+                                except Exception:
+                                    continue
+                            if recovered:
+                                all_image_data.extend(recovered[:4])
+                                print(f"🔍 [DEBUG] RAG에서 이미지 복구: {len(recovered)}개 (사용: {len(all_image_data)})")
+                        else:
+                            print("⚠️ [DEBUG] SimpleVectorStore 사용 불가 - 이미지 복구 생략")
+                    else:
+                        print("⚠️ [DEBUG] 사용자 문서가 없어 이미지 복구 불가")
+                else:
+                    print("⚠️ [DEBUG] vector_store_manager 미사용 - 이미지 복구 비활성화")
+            except Exception as e:
+                print(f"⚠️ [DEBUG] RAG 기반 이미지 복구 실패: {e}")
+        # 항상 참조 가능한 max_images 정의 (이미지 없으면 0)
+        max_images = min(len([img for img in all_image_data if img]) if all_image_data else 0, 4)
         if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
             print(f"🔍 [DEBUG] 이미지 처리 시작 - 총 이미지 개수: {len([img for img in all_image_data if img])}")
             # 🔄 공식 방식: 간단한 이미지 처리
             logger.info(f"🖼️  멀티모달 처리 시작... (이미지 {max_images}개)")
             try:
                 metas_list = []
+                # 먼저 메타데이터만 수집
                 for idx, image_bytes in enumerate(all_image_data[:max_images]):
                     if image_bytes:
                         try:
                             # 🔄 공식 이미지 프로세서 사용
                             if processor and hasattr(processor, 'image_processor'):
                                 processed = processor.image_processor(pil_image)
                                 metas_list.append(processed.get("image_meta", {}))
                             else:
                                 logger.warning(f"⚠️ 이미지 프로세서를 찾을 수 없음")
                     print(f"🔍 [DEBUG] 이미지 메타데이터: {combined_image_metas}")
                 else:
                     combined_image_metas = {}
+                # 이제 이미지 크기를 조정하여 pixel_values 생성
+                for idx, image_bytes in enumerate(all_image_data[:max_images]):
+                    if image_bytes:
+                        try:
+                            pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+                            # 🔄 공식 이미지 프로세서 사용
+                            if processor and hasattr(processor, 'image_processor'):
+                                # KananaVImageProcessor는 기본 파라미터만 지원
+                                processed = processor.image_processor(pil_image)
+                                pixel_values = processed["pixel_values"]
+                                # NOTE: pixel_values 및 메타데이터는 프로세서 산출 그대로 사용 (임의 조정 금지)
+                                #       모델 내부 prepare_mm_inputs가 일관성 있게 처리하도록 맡김
+                                all_pixel_values.append(pixel_values)
+                                metas_list.append(processed.get("image_meta", {}))
+                                # 중복 코드 제거 - 이미 위에서 처리됨
+                            else:
+                                logger.warning(f"⚠️ 이미지 프로세서를 찾을 수 없음")
+                        except Exception as e:
+                            logger.warning(f"⚠️ 이미지 {idx} 처리 실패: {e}")
             except Exception as e:
                 logger.error(f"❌ 이미지 전처리 실패: {e}")
                 combined_image_metas = {}
                 except Exception as e:
                     print(f"⚠️ [DEBUG] 컨텍스트 로드 실패: {e}")
                     context_prompt = ""
+                # 이미지 바이트를 세션 캐시에 보관 (���음 턴에 재사용)
+                if session_id:
+                    # 원본 요청에 이미지가 있었다면 그걸 우선 보관, 없으면 복구된 이미지 유지
+                    if image_data_list and len([img for img in image_data_list if img]) > 0:
+                        _session_image_cache[session_id] = list(image_data_list[:max_images])
+                    elif session_id not in _session_image_cache:
+                        _session_image_cache[session_id] = list(all_image_data[:max_images])
+                    # 최대 4장 제한
+                    _session_image_cache[session_id] = _session_image_cache[session_id][:4]
+                    print(f"🔍 [DEBUG] 세션 캐시 업데이트: {len(_session_image_cache[session_id])}장 저장 (세션: {session_id})")
             except Exception as e:
                 print(f"⚠️ [DEBUG] 컨텍스트 로드 실패: {e} (세션: {session_id})")
                 context_prompt = ""
         # 🔄 멀티모달 프롬프트 구성 (공식 방식)
         if all_pixel_values and len(all_pixel_values) > 0:
+            # 🔄 공식 Kanana 형식: 이미지 개수에 맞게 <image> 토큰 생성
+            num_images = len(all_pixel_values)
+            image_tokens = "<image>" * num_images  # 이미지 개수만큼 <image> 토큰 생성
+            # 답변 유도를 위해 Assistant 프리픽스 추가
+            formatted_prompt = f"Human: {image_tokens}{prompt}\nAssistant:"
             print(f"🔍 [DEBUG] 멀티모달 프롬프트 구성 (공식 형식): {formatted_prompt}")
+            print(f"🔍 [DEBUG] 이미지 토큰 생성: {num_images}개 이미지 -> {image_tokens}")
             image_processed = True
         else:
             image_processed = False
             if hasattr(tokenizer, 'encode_prompt'):
                 print(f"🔍 [DEBUG] encode_prompt 메서드 사용")
+                print(f"🔍 [DEBUG] combined_image_metas: {combined_image_metas}")
+                print(f"🔍 [DEBUG] 총 이미지 개수: {len(all_image_data)}")
+                # 🔄 메타데이터 검증 및 안전화
+                safe_image_meta = {}
+                if combined_image_metas:
+                    # image_token_thw 배열 길이 검증
+                    if 'image_token_thw' in combined_image_metas:
+                        image_token_thw = combined_image_metas['image_token_thw']
+                        if isinstance(image_token_thw, list) and len(image_token_thw) > 0:
+                            # 배열 길이가 이미지 개수와 일치하는지 확인
+                            if len(image_token_thw) == len(all_pixel_values):
+                                # 🔄 추가 검증: 각 배열 요소가 유효한지 확인
+                                valid_meta = True
+                                for i, thw in enumerate(image_token_thw):
+                                    if not isinstance(thw, (list, tuple)) or len(thw) != 3:
+                                        print(f"⚠️ [DEBUG] 메타데이터 요소 {i}가 유효하지 않음: {thw}")
+                                        valid_meta = False
+                                        break
+                                if valid_meta:
+                                    safe_image_meta = combined_image_metas
+                                    print(f"🔍 [DEBUG] 메타데이터 검증 통과: {len(image_token_thw)}개 이미지")
+                                else:
+                                    print(f"⚠️ [DEBUG] 메타데이터 요소 검증 실패, 기본값 사용")
+                                    safe_image_meta = {
+                                        'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
+                                        'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
+                                    }
+                            else:
+                                print(f"⚠️ [DEBUG] 메타데이터 불일치: 이미지 {len(all_pixel_values)}개, 메타 {len(image_token_thw)}개")
+                                # 안전한 기본값 사용
+                                safe_image_meta = {
+                                    'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
+                                    'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
+                                }
+                        else:
+                            print(f"⚠️ [DEBUG] image_token_thw가 유효하지 않음, 기본값 사용")
+                            safe_image_meta = {
+                                'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
+                                'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
+                            }
+                    else:
+                        print(f"⚠️ [DEBUG] image_token_thw 없음, 기본값 생성")
+                        safe_image_meta = {
+                            'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
+                            'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
+                        }
+                else:
+                    print(f"⚠️ [DEBUG] combined_image_metas 없음, 기본값 생성")
+                    safe_image_meta = {
+                        'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
+                        'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
+                    }
+                print(f"🔍 [DEBUG] 안전화된 메타데이터: {safe_image_meta}")
+                # 🔄 안전한 메타데이터로 encode_prompt 호출
                 try:
+                    # 🔄 추가 안전장치: 메타데이터 복사본 생성
+                    final_meta = {}
+                    for key, value in safe_image_meta.items():
+                        if isinstance(value, list):
+                            final_meta[key] = value.copy()  # 복사본 생성
+                        else:
+                            final_meta[key] = value
+                    print(f"🔍 [DEBUG] 최종 메타데이터: {final_meta}")
+                    # 🔄 공식 방식: max_length 파라미터 추가
                     inputs = tokenizer.encode_prompt(
                         prompt=formatted_prompt,
+                        max_length=2048,  # 공식 코드와 동일
+                        image_meta=final_meta
                     )
+                    print(f"🔍 [DEBUG] encode_prompt 출력: {list(inputs.keys())}")
+                    # 🔄 encode_prompt 출력 정규화 (seq_length 제거)
                     if 'seq_length' in inputs:
+                        print(f"🔍 [DEBUG] seq_length 제거됨")
                         del inputs['seq_length']
+                    # 🔄 input_ids 안전하게 추출 (공식 방식)
+                    if isinstance(inputs['input_ids'], tuple):
+                        print(f"🔍 [DEBUG] input_ids가 튜플임: {len(inputs['input_ids'])}개 요소")
+                        input_ids = inputs['input_ids'][0]  # 첫 번째 요소 사용
+                        print(f"🔍 [DEBUG] input_ids 튜플에서 첫 번째 요소 추출: {input_ids.shape}")
+                    else:
+                        input_ids = inputs['input_ids']
+                    # 🔄 attention_mask도 안전하게 추출
+                    if isinstance(inputs['attention_mask'], tuple):
+                        print(f"🔍 [DEBUG] attention_mask가 튜플임: {len(inputs['attention_mask'])}개 요소")
+                        attention_mask = inputs['attention_mask'][0]  # 첫 번째 요소 사용
+                        print(f"🔍 [DEBUG] attention_mask 튜플에서 첫 번째 요소 추출: {attention_mask.shape}")
+                    else:
+                        attention_mask = inputs['attention_mask']
+                    # 🔄 최종 검증
+                    print(f"🔍 [DEBUG] 최종 input_ids 타입: {type(input_ids)}, shape: {input_ids.shape}")
+                    print(f"🔍 [DEBUG] 최종 attention_mask 타입: {type(attention_mask)}, shape: {attention_mask.shape}")
                 except Exception as e:
                     print(f"❌ [DEBUG] encode_prompt 실패: {e}, 폴백 사용")
                     # 폴백: 기본 토크나이저 사용
         print(f"🔍 [DEBUG] 최종 input_ids shape: {input_ids.shape}")
         print(f"🔍 [DEBUG] 입력 토큰 수: {input_ids.shape[1]}")
+        # 🔄 멀티모달: -1 토큰은 모델 내부에서 시각 임베딩으로 대체되므로 유지
+        negative_mask = input_ids < 0
+        if negative_mask.any():
+            print(f"🔍 [DEBUG] -1 토큰 유지: {negative_mask.sum().item()}개")
         # --- 4. 생성 설정 ---
         print(f"🔍 [DEBUG] 생성 설정 구성 시작")
         gen_config = current_profile.get_generation_config()
                     print(f"🔍 [DEBUG] 멀티모달 추론 실행")
                     print(f"🔍 [DEBUG] 이미지 텐서 개수: {len(all_pixel_values)}")
+                    # 이미지 텐서는 프로세서 산출값을 그대로 결합 (임의 조정 금지)
                     pixel_values = torch.cat(all_pixel_values, dim=0)
                     print(f"🔍 [DEBUG] 결합된 이미지 텐서 shape: {pixel_values.shape}")
+                    # 디버깅: 이미지별 토큰 오프셋 범위 출력
+                    try:
+                        offsets = []
+                        start = 0
+                        for i, img_t in enumerate(all_pixel_values):
+                            end = start + img_t.shape[0]
+                            offsets.append((start, end))
+                            start = end
+                        print(f"🔍 [DEBUG] 이미지별 토큰 범위: {offsets}")
+                    except Exception as _e:
+                        print(f"⚠️ [DEBUG] 이미지 오프셋 계산 실패: {_e}")
                     print(f"🔍 [DEBUG] 이미지 텐서 dtype: {pixel_values.dtype}")
                     # 🔄 모델과 동일한 dtype으로 변환 (성능 최적화)
                     print(f"🔍 [DEBUG] 최종 이미지 텐서 dtype: {pixel_values.dtype}")
                     print(f"🔍 [DEBUG] 모델 생성 시작 - 멀티모달")
+                    # 🔄 공통 이미지 메타데이터 처리 (모든 모델 경로에서 동일하게)
+                    processed_image_metas = {}
+                    # 🔄 vision_grid_thw를 텐서로 변환 (모델 내부 슬라이싱 호환성)
+                    if 'vision_grid_thw' in combined_image_metas:
+                        vision_grid = combined_image_metas['vision_grid_thw']
+                        if isinstance(vision_grid, list):
+                            # 리스트를 텐서로 변환: [(1, 34, 52), (1, 14, 36)] -> tensor([[1, 34, 52], [1, 14, 36]])
+                            _vg = torch.tensor(vision_grid, dtype=torch.long)
+                            # rot_pos_emb는 [N,3] 또는 리스트[(t,h,w)]를 기대하므로 배치 차원 없이 전달
+                            processed_image_metas['vision_grid_thw'] = _vg  # [N, 3]
+                            print(f"🔍 [DEBUG] 공통 - vision_grid_thw 텐서로 변환: {processed_image_metas['vision_grid_thw'].shape}")
+                        else:
+                            processed_image_metas['vision_grid_thw'] = vision_grid
+                            print(f"🔍 [DEBUG] 공통 - vision_grid_thw 원본 형태 유지: {vision_grid}")
+                    # 🔄 다른 메타데이터도 배치 차원 맞춤
+                    for key, value in combined_image_metas.items():
+                        if key != 'vision_grid_thw':
+                            if isinstance(value, list):
+                                # 리스트인 경우 올바른 형태로 변환
+                                if key == 'image_token_thw':
+                                    # image_token_thw는 [1, N, 3]로 전달 (모델 기대 형태)
+                                    _thw = torch.tensor(value, dtype=torch.long)
+                                    processed_image_metas[key] = _thw.unsqueeze(0)
+                                else:
+                                    # 다른 메타데이터는 기존 방식
+                                    processed_image_metas[key] = [value]
+                            elif isinstance(value, torch.Tensor) and len(value.shape) == 2:
+                                # 2차원 텐서인 경우 배치 차원 추가
+                                processed_image_metas[key] = value.unsqueeze(0)
+                            else:
+                                processed_image_metas[key] = value
+                    # 🔄 참고 로그만 출력: 이미지 토큰 수 추정 (조정은 하지 않음)
+                    if 'image_token_thw' in processed_image_metas:
+                        image_token_thw = processed_image_metas['image_token_thw']
+                        if isinstance(image_token_thw, torch.Tensor):
+                            total_image_tokens = 0
+                            print(f"🔍 [DEBUG] image_token_thw shape: {image_token_thw.shape}")
+                            print(f"🔍 [DEBUG] image_token_thw 내용: {image_token_thw}")
+                            for i in range(image_token_thw.shape[0]):
+                                token_info = image_token_thw[i]
+                                if len(token_info) == 3:
+                                    t, h, w = token_info
+                                    total_image_tokens += t * h * w
+                                elif len(token_info) == 2:
+                                    h, w = token_info
+                                    total_image_tokens += h * w
+                            print(f"🔍 [DEBUG] 계산된 총 이미지 토큰 수(참고): {total_image_tokens}")
+                            if isinstance(total_image_tokens, torch.Tensor):
+                                total_image_tokens = total_image_tokens.sum().item()
+                            print(f"🔍 [DEBUG] pixel_values 길이: {pixel_values.shape[0]}, 예상: {total_image_tokens} (조정 안함)")
+                    # 안전 가드: vision_grid_thw가 [1, N, 3]로 오면 [N, 3]로 변환
+                    try:
+                        if isinstance(processed_image_metas.get('vision_grid_thw', None), torch.Tensor):
+                            _vg = processed_image_metas['vision_grid_thw']
+                            if _vg.dim() == 3 and _vg.shape[0] == 1 and _vg.shape[-1] == 3:
+                                processed_image_metas['vision_grid_thw'] = _vg.squeeze(0)
+                                print(f"🔍 [DEBUG] vision_grid_thw 배치 차원 제거: {processed_image_metas['vision_grid_thw'].shape}")
+                    except Exception as _e:
+                        print(f"⚠️ [DEBUG] vision_grid_thw 정규화 실패: {_e}")
+                    # 멀티모달 경로도 캐시 사용 활성화
+                    try:
+                        gen_config['use_cache'] = True
+                    except Exception:
+                        pass
+                    # 모델 eval 모드 전환 (성능/일관성)
+                    try:
+                        current_model.eval()
+                    except Exception:
+                        pass
                     # LoRA 어댑터가 적용된 모델인지 확인
                     try:
                         from lily_llm_core.lora_manager import lora_manager
                             lora_model = lora_manager.get_model()
                             if lora_model:
                                 print(f"🔍 [DEBUG] LoRA 모델로 멀티모달 생성 실행")
                                 generate_kwargs = {
                                     'input_ids': input_ids,
                                     'attention_mask': attention_mask,
                                     'pixel_values': pixel_values,
+                                    'image_metas': processed_image_metas,  # 🔄 공통으로 처리된 이미지 메타데이터
                                     **gen_config
                                 }
                                 print(f"🔍 [DEBUG] LoRA 모델 생성 파라미터: {list(generate_kwargs.keys())}")
                                 print(f"🔍 [DEBUG] 처리된 image_metas: {list(processed_image_metas.keys())}")
                                 print(f"🔍 [DEBUG] 모델 생성 시작... (타임아웃 없음)")
+                                try:
+                                    lora_model.eval()
+                                except Exception:
+                                    pass
+                                import torch as _torch
+                                with _torch.inference_mode():
+                                    generated_ids = lora_model.generate(**generate_kwargs)
                             else:
                                 print(f"⚠️ [DEBUG] LoRA 모델을 가져올 수 없음, 기본 모델 사용")
+                                # 🔄 LoRA 모델을 가져올 수 없을 때도 동일한 파라미터 구조 사용 (통일성)
+                                generate_kwargs = {
+                                    'input_ids': input_ids,
+                                    'attention_mask': attention_mask,
+                                    'pixel_values': pixel_values,
+                                    'image_metas': processed_image_metas,  # 🔄 공통으로 처리된 이미지 메타데이터
                                     **gen_config
+                                }
+                                print(f"🔍 [DEBUG] LoRA 모델 없음 시 기본 모델 생성 파라미터: {list(generate_kwargs.keys())}")
+                                print(f"🔍 [DEBUG] 처리된 image_metas: {list(processed_image_metas.keys())}")
+                                import torch as _torch
+                                with _torch.inference_mode():
+                                    generated_ids = current_model.generate(**generate_kwargs)
                         else:
                             print(f"🔍 [DEBUG] LoRA 어댑터 없음 (멀티모달), 기본 모델 사용")
+                            # 🔄 기본 모델도 동일한 파라미터 구조 사용 (통일성)
+                            generate_kwargs = {
+                                'input_ids': input_ids,
+                                'attention_mask': attention_mask,
+                                'pixel_values': pixel_values,
+                                'image_metas': processed_image_metas,  # 🔄 공통으로 처리된 이미지 메타데이터
                                 **gen_config
+                            }
+                            print(f"🔍 [DEBUG] 기본 모델 생성 파라미터: {list(generate_kwargs.keys())}")
+                            print(f"🔍 [DEBUG] 처리된 image_metas: {list(processed_image_metas.keys())}")
+                            import torch as _torch
+                            with _torch.inference_mode():
+                                generated_ids = current_model.generate(**generate_kwargs)
                     except ImportError:
                         print(f"🔍 [DEBUG] LoRA 지원 안됨, 기본 모델 사용")
+                        # 🔄 ImportError 발생 시에도 동일한 파라미터 구조 사용 (통일성)
+                        generate_kwargs = {
+                            'input_ids': input_ids,
+                            'attention_mask': attention_mask,
+                            'pixel_values': pixel_values,
+                            'image_metas': processed_image_metas,  # 🔄 공통으로 처리된 이미지 메타데이터
                             **gen_config
+                        }
+                        print(f"🔍 [DEBUG] ImportError 시 기본 모델 생성 파라미터: {list(generate_kwargs.keys())}")
+                        print(f"🔍 [DEBUG] 처리된 image_metas: {list(processed_image_metas.keys())}")
+                        import torch as _torch
+                        with _torch.inference_mode():
+                            generated_ids = current_model.generate(**generate_kwargs)
                 else:
                     # 텍스트-only: 기존 방식
             traceback.print_exc()
             return {"error": f"Response extraction failed: {str(e)}"}
+        # --- 7. 컨텍스트 저장 및 결과 반환 ---
         total_time = time.time() - t_tok_start
         print(f"🔍 [DEBUG] 전체 처리 완료 - 총 소요시간: {total_time:.3f}초")
+        # 컨텍스트 누적 저장 (세션/룸 단위)
+        try:
+            if use_context and session_id:
+                try:
+                    from lily_llm_core.context_manager import context_manager
+                    if context_manager:
+                        # 사용자 메시지 저장 (이미지 여부 메타 포함)
+                        context_manager.add_user_message(
+                            prompt,
+                            metadata={
+                                "session_id": session_id,
+                                "room_id": room_id,
+                                "images_used": bool(all_image_data and len([img for img in all_image_data if img]) > 0),
+                                "num_images": len([img for img in all_image_data if img]) if all_image_data else 0,
+                            },
+                        )
+                        # 어시스턴트 메시지 저장
+                        context_manager.add_assistant_message(
+                            response,
+                            metadata={
+                                "session_id": session_id,
+                                "room_id": room_id,
+                            },
+                        )
+                        print(f"🔍 [DEBUG] 컨텍스트 저장 완료 (세션: {session_id}, 룸: {room_id})")
+                except Exception as _ctx_e:
+                    print(f"⚠️ [DEBUG] 컨텍스트 저장 실패: {_ctx_e}")
+        except Exception:
+            pass
         return {
             "generated_text": response,
             "processing_time": total_time,

lily_llm_api/services/session_registry.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""
+간단한 세션/룸 ↔ 사용자 매핑 레지스트리
+- 프로세스 메모리 기반 (서비스 재시작 시 초기화)
+- 업로드/생성 간 user_id 불일치 보정용
+"""
+from typing import Optional, Dict
+import time
+_room_to_user: Dict[str, str] = {}
+_session_to_user: Dict[str, str] = {}
+_last_user: Optional[str] = None
+_last_updated_at: float = 0.0
+def set_user_for_room(room_id: Optional[str], user_id: Optional[str]) -> None:
+    if not room_id or not user_id:
+        return
+    _room_to_user[str(room_id)] = str(user_id)
+    set_last_user(user_id)
+def get_user_for_room(room_id: Optional[str]) -> Optional[str]:
+    if not room_id:
+        return None
+    return _room_to_user.get(str(room_id))
+def set_user_for_session(session_id: Optional[str], user_id: Optional[str]) -> None:
+    if not session_id or not user_id:
+        return
+    _session_to_user[str(session_id)] = str(user_id)
+    set_last_user(user_id)
+def get_user_for_session(session_id: Optional[str]) -> Optional[str]:
+    if not session_id:
+        return None
+    return _session_to_user.get(str(session_id))
+def set_last_user(user_id: Optional[str]) -> None:
+    global _last_user, _last_updated_at
+    if not user_id:
+        return
+    _last_user = str(user_id)
+    _last_updated_at = time.time()
+def get_last_user() -> Optional[str]:
+    return _last_user
+def clear() -> None:
+    _room_to_user.clear()
+    _session_to_user.clear()
+    global _last_user, _last_updated_at
+    _last_user = None
+    _last_updated_at = 0.0

lily_llm_core/document_processor.py CHANGED Viewed

@@ -436,80 +436,69 @@ class DocumentProcessor:
                         img_pil = Image.open(io.BytesIO(img_data))
                         if self._is_valid_image(img_pil):
-                        # 이미지의 위치 정보 추출 (중요!)
                             img_rect = self._get_image_rect(page, xref)
-                        if img_rect:
-                            bbox = BoundingBox(
-                                x0=img_rect.x0,
-                                y0=img_rect.y0,
-                                x1=img_rect.x1,
-                                y1=img_rect.y1
-                            )
-                            image_block = PDFBlock(
-                                block_id=f"page_{page_num + 1}_image_{img_idx}",
-                                block_type="image",
-                                bbox=bbox,
-                                content=img_data,  # 바이너리 데이터
-                                page_num=page_num + 1,
-                                metadata={
-                                    "image_size": img_pil.size,
-                                    "image_format": "PNG",
-                                    "image_mode": img_pil.mode,
-                                    "xref": xref,
-                                    "is_embedded": True
-                                }
-                            )
-                            image_blocks.append(image_block)
-                            logger.debug(f"🖼️ 이미지 블록 추출: 페이지 {page_num + 1}, "
-                                       f"위치 ({bbox.x0:.1f}, {bbox.y0:.1f}, {bbox.x1:.1f}, {bbox.y1:.1f}), "
-                                       f"크기 {img_pil.size}")
                     pix = None
                 except Exception as e:
                     logger.warning(f"⚠️ 이미지 {img_idx} 처리 실패: {e}")
-            # 2. 이미지가 없으면 전체 페이지 렌더링 (fallback)
             if not image_blocks:
-                # 현재 모델이 멀티모달인지 확인
-                try:
-                    from lily_llm_api.app_v2 import current_profile
-                    is_multimodal = hasattr(current_profile, 'multimodal') and current_profile.multimodal
-                    if is_multimodal:
-                        # 전체 페이지를 이미지로 렌더링
-                        pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2배 해상도
-                        img_data = pix.tobytes("png")
-                        bbox = BoundingBox(
-                            x0=0, y0=0,
-                            x1=page.rect.width,
-                            y1=page.rect.height
-                        )
-                        image_block = PDFBlock(
-                            block_id=f"page_{page_num + 1}_fullpage",
-                            block_type="image",
-                            bbox=bbox,
-                            content=img_data,
-                            page_num=page_num + 1,
-                            metadata={
-                                "image_size": (pix.width, pix.height),
-                                "image_format": "PNG",
-                                "is_embedded": False,
-                                "is_full_page_render": True
-                            }
-                        )
-                        image_blocks.append(image_block)
-                        logger.debug(f"📄 전체 페이지 렌더링: 페이지 {page_num + 1}")
-                        pix = None
-                except ImportError:
-                    pass  # app_v2 import 실패 시 무시
         except Exception as e:
             logger.warning(f"⚠️ 페이지 {page_num + 1} 이미지 블록 추출 실패: {e}")

                         img_pil = Image.open(io.BytesIO(img_data))
                         if self._is_valid_image(img_pil):
+                            # 이미지의 위치 정보 추출 (중요!)
                             img_rect = self._get_image_rect(page, xref)
+                            if img_rect:
+                                bbox = BoundingBox(
+                                    x0=img_rect.x0,
+                                    y0=img_rect.y0,
+                                    x1=img_rect.x1,
+                                    y1=img_rect.y1
+                                )
+                                image_block = PDFBlock(
+                                    block_id=f"page_{page_num + 1}_image_{img_idx}",
+                                    block_type="image",
+                                    bbox=bbox,
+                                    content=img_data,  # 바이너리 데이터
+                                    page_num=page_num + 1,
+                                    metadata={
+                                        "image_size": img_pil.size,
+                                        "image_format": "PNG",
+                                        "image_mode": img_pil.mode,
+                                        "xref": xref,
+                                        "is_embedded": True
+                                    }
+                                )
+                                image_blocks.append(image_block)
+                                logger.debug(f"🖼️ 이미지 블록 추출: 페이지 {page_num + 1}, "
+                                           f"위치 ({bbox.x0:.1f}, {bbox.y0:.1f}, {bbox.x1:.1f}, {bbox.y1:.1f}), "
+                                           f"크기 {img_pil.size}")
                     pix = None
                 except Exception as e:
                     logger.warning(f"⚠️ 이미지 {img_idx} 처리 실패: {e}")
+            # 2. 이미지가 없으면 전체 페이지 렌더링 (fallback - 항상 수행)
             if not image_blocks:
+                pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2배 해상도
+                img_data = pix.tobytes("png")
+                bbox = BoundingBox(
+                    x0=0, y0=0,
+                    x1=page.rect.width,
+                    y1=page.rect.height
+                )
+                image_block = PDFBlock(
+                    block_id=f"page_{page_num + 1}_fullpage",
+                    block_type="image",
+                    bbox=bbox,
+                    content=img_data,
+                    page_num=page_num + 1,
+                    metadata={
+                        "image_size": (pix.width, pix.height),
+                        "image_format": "PNG",
+                        "is_embedded": False,
+                        "is_full_page_render": True
+                    }
+                )
+                image_blocks.append(image_block)
+                logger.debug(f"📄 전체 페이지 렌더링: 페이지 {page_num + 1}")
+                pix = None
         except Exception as e:
             logger.warning(f"⚠️ 페이지 {page_num + 1} 이미지 블록 추출 실패: {e}")