gbrabbit's picture
Auto commit at 25-2025-08 3:12:15
0e9a45c
#!/usr/bin/env python3
"""
Lily LLM API ์„œ๋ฒ„ v2 (์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ์„ ํƒ ๋ณต์› ๋ฐ ์„ฑ๋Šฅ ์ตœ์ ํ™” ์ตœ์ข…๋ณธ)
"""
from fastapi import FastAPI, HTTPException, Request, UploadFile, File, Form, Depends, WebSocket, WebSocketDisconnect
from fastapi.security import HTTPAuthorizationCredentials
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn
import logging
import time
import torch
from datetime import datetime
from typing import Optional, List, Union
import asyncio
import concurrent.futures
import sys
from PIL import Image
import io
import os
import json
from pathlib import Path
import warnings
# ๐Ÿ”„ RoPE ๊ฒฝ๊ณ  ์ˆจ๊ธฐ๊ธฐ (Kanana ๋ชจ๋ธ ๋‚ด๋ถ€ ๊ตฌํ˜„ ๊ด€๋ จ)
warnings.filterwarnings("ignore", message="The attention layers in this model are transitioning")
warnings.filterwarnings("ignore", message="rotary_pos_emb will be removed")
warnings.filterwarnings("ignore", message="position_embeddings will be mandatory")
# logging ์„ค์ •์„ ๋จผ์ € ๊ตฌ์„ฑ
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
from .models import get_model_profile, list_available_models
from lily_llm_core.rag_processor import rag_processor
from lily_llm_core.document_processor import document_processor
from lily_llm_core.hybrid_prompt_generator import hybrid_prompt_generator
from lily_llm_core.database import db_manager
from lily_llm_core.auth_manager import auth_manager
from lily_llm_core.websocket_manager import connection_manager
from lily_llm_core.celery_app import (
process_document_async, generate_ai_response_async,
rag_query_async, batch_process_documents_async,
get_task_status, cancel_task
)
from lily_llm_core.performance_monitor import performance_monitor
# ์ด๋ฏธ์ง€ OCR ์ „์šฉ ๋ชจ๋“ˆ ์ถ”๊ฐ€
from lily_llm_core.image_rag_processor import image_rag_processor
from lily_llm_core.latex_rag_processor import latex_rag_processor
from lily_llm_core.vector_store_manager import vector_store_manager
# LaTeX-OCR + FAISS ํ†ตํ•ฉ ์‹œ์Šคํ…œ ์ถ”๊ฐ€
# from latex_ocr_faiss_integrated import LatexOCRFAISSIntegrated
# from latex_ocr_faiss_simple import LatexOCRFAISSSimple
# ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ํ”„๋กœ์„ธ์„œ ์ถ”๊ฐ€
from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
# ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ๋ฐ LoRA ๊ด€๋ฆฌ์ž ์ถ”๊ฐ€
from lily_llm_core.context_manager import get_context_manager, context_manager
# ๊ณ„์ธต์  ๋ฉ”๋ชจ๋ฆฌ ์‹œ์Šคํ…œ ์ถ”๊ฐ€
from lily_llm_core.integrated_memory_manager import integrated_memory_manager
from lily_llm_core.text_summarizer import text_summarizer, SummaryConfig
# ์ „์—ญ ๋ณ€์ˆ˜๋“ค
current_model = None # ๐Ÿ”„ ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ ์ธ์Šคํ„ด์Šค
current_profile = None # ๐Ÿ”„ ํ˜„์žฌ ์„ ํƒ๋œ ๋ชจ๋ธ ํ”„๋กœํ•„
model_loaded = False # ๐Ÿ”„ ๋ชจ๋ธ ๋กœ๋“œ ์ƒํƒœ
# LoRA ๊ด€๋ฆฌ์ž import (์„ ํƒ์ )
try:
from lily_llm_core.lora_manager import get_lora_manager, lora_manager
LORA_AVAILABLE = True
logger.info("โœ… LoRA ๊ด€๋ฆฌ์ž import ์„ฑ๊ณต")
except ImportError as e:
logger.warning(f"โš ๏ธ LoRA ๊ด€๋ฆฌ์ž import ์‹คํŒจ: {e}")
LORA_AVAILABLE = False
lora_manager = None
get_lora_manager = None
# ===== ๊ณตํ†ต LoRA ์„ค์ • ํ•จ์ˆ˜ =====
def setup_lora_for_model(profile, lora_manager):
"""๋ชจ๋ธ ํ”„๋กœํ•„์— ๋”ฐ๋ฅธ LoRA ์„ค์ • (๊ณตํ†ต ํ•จ์ˆ˜)"""
if not LORA_AVAILABLE or not lora_manager:
logger.warning("โš ๏ธ LoRA๊ฐ€ ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•˜์—ฌ ์ž๋™ ์„ค์ • ๊ฑด๋„ˆ๋œ€")
return False
try:
logger.info("๐Ÿ”ง LoRA ์ž๋™ ์„ค์ • ์‹œ์ž‘...")
# ๐Ÿ”„ ๋ชจ๋ธ ํ”„๋กœํ•„์—์„œ ๊ฒฝ๋กœ ๋ฐ ํƒ€์ž… ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
current_model_path = None
model_type = "causal_lm" # ๊ธฐ๋ณธ๊ฐ’
# ๐Ÿ”„ ๋ชจ๋ธ ํ”„๋กœํ•„์—์„œ ๊ฒฝ๋กœ ๋ฐ ํƒ€์ž… ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
if hasattr(profile, 'local_path') and profile.local_path:
# ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
current_model_path = profile.local_path
# ๐Ÿ”„ local_path ์‚ฌ์šฉ ์‹œ์—๋„ model_type ์„ค์ • ํ•„์š”
if hasattr(profile, 'model_id') and profile.model_id:
model_id = profile.model_id
if model_id == "kanana-1.5-v-3b-instruct":
model_type = "vision2seq" # ๐Ÿ”„ kanana๋Š” vision2seq ํƒ€์ž…
else:
model_type = "causal_lm" # ๊ธฐ๋ณธ๊ฐ’
logger.info(f"๐Ÿ” ๋ชจ๋ธ ํ”„๋กœํ•„์—์„œ ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ: {current_model_path}")
logger.info(f"๐Ÿ” ๊ฒฐ์ •๋œ ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
elif hasattr(profile, 'model_id') and profile.model_id:
# ๋ชจ๋ธ ID๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ฒฝ๋กœ ๊ฒฐ์ •
model_id = profile.model_id
logger.info(f"๐Ÿ” ๋ชจ๋ธ ID ๊ธฐ๋ฐ˜ ๊ฒฝ๋กœ ๊ฒฐ์ •: {model_id}")
# ๐Ÿ”„ ํ™˜๊ฒฝ์— ๋”ฐ๋ฅธ ๊ฒฝ๋กœ ๊ฒฐ์ •
if hasattr(profile, 'is_local') and profile.is_local:
# ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
if model_id == "polyglot-ko-1.3b-chat":
current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
model_type = "causal_lm"
elif model_id == "kanana-1.5-v-3b-instruct":
current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
model_type = "vision2seq" # ๐Ÿ”„ kanana๋Š” vision2seq ํƒ€์ž…
elif model_id == "polyglot-ko-5.8b-chat":
current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
model_type = "causal_lm"
else:
# ๋ฐฐํฌ ํ™˜๊ฒฝ: HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ (๋กœ์ปฌ ๊ฒฝ๋กœ ์—†์Œ)
current_model_path = None
logger.info(f"๐Ÿ” ๋ฐฐํฌ ํ™˜๊ฒฝ: LoRA ์„ค์ • ๊ฑด๋„ˆ๋œ€ (HF ๋ชจ๋ธ)")
return False
logger.info(f"๐Ÿ” ๊ฒฐ์ •๋œ ๋ชจ๋ธ ๊ฒฝ๋กœ: {current_model_path}")
logger.info(f"๐Ÿ” ๊ฒฐ์ •๋œ ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
if not current_model_path:
logger.warning("โš ๏ธ ํ˜„์žฌ ๋ชจ๋ธ์˜ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์–ด LoRA ์ž๋™ ๋กœ๋“œ ๊ฑด๋„ˆ๋œ€")
return False
logger.info(f"๐Ÿ” LoRA ๋ชจ๋ธ ๊ฒฝ๋กœ: {current_model_path}")
logger.info(f"๐Ÿ” LoRA ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
# ๐Ÿ”„ ์ด๋ฏธ ๋กœ๋“œ๋œ ๋ฉ”์ธ ๋ชจ๋ธ์„ LoRA์— ์ง์ ‘ ์ ์šฉ (์ค‘๋ณต ๋กœ๋“œ ๋ฐฉ์ง€)
logger.info("๐Ÿ”ง ๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ์— LoRA ์ง์ ‘ ์ ์šฉ ์‹œ์ž‘...")
# ๐Ÿ”„ lora_manager์— ๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ ์„ค์ •
if hasattr(lora_manager, 'base_model') and lora_manager.base_model is None:
# ์ „์—ญ ๋ณ€์ˆ˜์—์„œ ๋ฉ”์ธ ๋ชจ๋ธ ๊ฐ€์ ธ์˜ค๊ธฐ
from lily_llm_api.app import current_model
if current_model is not None:
lora_manager.base_model = current_model
logger.info("โœ… ๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ์„ LoRA ๊ด€๋ฆฌ์ž์— ์„ค์ • ์™„๋ฃŒ")
else:
logger.warning("โš ๏ธ ๋ฉ”์ธ ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์–ด LoRA ์„ค์ • ๊ฑด๋„ˆ๋œ€")
return False
# LoRA ์„ค์ • ์ƒ์„ฑ
logger.info("๐Ÿ”ง LoRA ์„ค์ • ์ƒ์„ฑ ์‹œ์ž‘...")
# ๐Ÿ”„ ๋ชจ๋ธ๋ณ„ target modules ์„ค์ •
if model_type == "vision2seq" and "kanana" in profile.model_id:
# Kanana ๋ชจ๋ธ: Llama ๊ธฐ๋ฐ˜ language model ์‚ฌ์šฉ (์ฒซ ๋ฒˆ์งธ ๋ ˆ์ด์–ด๋งŒ ์‚ฌ์šฉ)
target_modules = [
"language_model.model.layers.0.self_attn.q_proj",
"language_model.model.layers.0.self_attn.k_proj",
"language_model.model.layers.0.self_attn.v_proj",
"language_model.model.layers.0.self_attn.o_proj",
"language_model.model.layers.0.mlp.gate_proj",
"language_model.model.layers.0.mlp.up_proj",
"language_model.model.layers.0.mlp.down_proj"
]
else:
# ๊ธฐ์กด ๋ชจ๋ธ๋“ค: GPTNeoX ๊ธฐ๋ฐ˜
target_modules = ["query_key_value", "mlp.dense_h_to_4h", "mlp.dense_4h_to_h"]
lora_config = lora_manager.create_lora_config(
r=16,
lora_alpha=32,
lora_dropout=0.1,
bias="none",
task_type="CAUSAL_LM" if model_type == "causal_lm" else "VISION_2_SEQ",
target_modules=target_modules
)
logger.info("โœ… LoRA ์„ค์ • ์ƒ์„ฑ ์™„๋ฃŒ")
# LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ (๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ์— ์ง์ ‘)
logger.info("๐Ÿ”ง LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹œ์ž‘...")
adapter_success = lora_manager.apply_lora_to_model("auto_adapter")
if adapter_success:
logger.info("โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์™„๋ฃŒ: auto_adapter")
logger.info("๐ŸŽ‰ LoRA ์ž๋™ ์„ค์ • ์™„๋ฃŒ!")
return True
else:
logger.error("โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹คํŒจ")
return False
except Exception as e:
logger.error(f"โŒ LoRA ์ž๋™ ์„ค์ • ์ค‘ ์˜ค๋ฅ˜: {e}")
return False
# ===== lifespan ์ปจํ…์ŠคํŠธ ๋งค๋‹ˆ์ € (์„œ๋ฒ„ ์‹œ์ž‘/์ข…๋ฃŒ ์ด๋ฒคํŠธ) =====
from contextlib import asynccontextmanager
@asynccontextmanager
async def lifespan(app: FastAPI):
"""์„œ๋ฒ„ ์ƒ๋ช…์ฃผ๊ธฐ ๊ด€๋ฆฌ"""
# ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ
logger.info("๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘ ์ด๋ฒคํŠธ ์‹คํ–‰ ์ค‘...")
# CPU ์Šค๋ ˆ๋“œ ์ตœ์ ํ™” ์ ์šฉ
try:
configure_cpu_threads()
logger.info("โœ… CPU ์Šค๋ ˆ๋“œ ์ตœ์ ํ™” ์™„๋ฃŒ")
except Exception as e:
logger.error(f"โŒ CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
# ๐Ÿ”„ ๋ชจ๋ธ ์„ ํƒ ๋ณต์›: ์‚ฌ์šฉ์ž๊ฐ€ ๋ชจ๋ธ์„ ์„ ํƒํ•  ์ˆ˜ ์žˆ๋„๋ก
selected_model_id = select_model_interactive()
logger.info(f"๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ์„ ํƒ๋œ ๋ชจ๋ธ: {selected_model_id}")
try:
await load_model_async(selected_model_id)
global model_loaded
model_loaded = True
logger.info(f"โœ… ์„œ๋ฒ„๊ฐ€ '{current_profile.display_name}' ๋ชจ๋ธ๋กœ ์ค€๋น„๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
logger.info(f"โœ… model_loaded ์ƒํƒœ: {model_loaded}")
# ๐Ÿ”„ ์‹ค๋ฌด์šฉ: ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์„ค์ •
try:
# ์š”์•ฝ ๋ฐฉ๋ฒ•์„ smart๋กœ ์„ค์ • (๊ฐ€์žฅ ๊ท ํ˜•์žกํžŒ ์š”์•ฝ)
context_manager.set_summary_method("smart")
logger.info("โœ… ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์„ค์ • ์™„๋ฃŒ: smart ์š”์•ฝ ๋ฐฉ๋ฒ• ํ™œ์„ฑํ™”")
# ์ž๋™ ์ •๋ฆฌ ์„ค์ • (ํ™˜๊ฒฝ๋ณ€์ˆ˜๋กœ ์˜ค๋ฒ„๋ผ์ด๋“œ)
import os
enabled = os.getenv('LILY_CONTEXT_AUTOCLEAN_ENABLED', '1') in ['1', 'true', 'True']
interval_turns = int(os.getenv('LILY_CONTEXT_AUTOCLEAN_TURNS', '12'))
interval_time = int(os.getenv('LILY_CONTEXT_AUTOCLEAN_TIME', '600'))
strategy = os.getenv('LILY_CONTEXT_CLEANUP_STRATEGY', 'smart')
context_manager.set_auto_cleanup_config(
enabled=enabled,
interval_turns=interval_turns,
interval_time=interval_time,
strategy=strategy
)
logger.info("โœ… ์ž๋™ ์ •๋ฆฌ ์„ค์ • ์ตœ์ ํ™” ์™„๋ฃŒ")
except Exception as e:
logger.warning(f"โš ๏ธ ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์„ค์ • ์‹คํŒจ: {e}")
# ๐Ÿ”„ LoRA ์ž๋™ ์„ค์ •์€ load_model_async ๋‚ด๋ถ€์—์„œ ์ด๋ฏธ ์ฒ˜๋ฆฌ๋จ
# setup_lora_for_model(current_profile, lora_manager) # ์ค‘๋ณต ํ˜ธ์ถœ ์ œ๊ฑฐ
except Exception as e:
logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค: {e}", exc_info=True)
model_loaded = False
logger.info("โœ… ์„œ๋ฒ„ ์‹œ์ž‘ ์ด๋ฒคํŠธ ์™„๋ฃŒ")
yield # ์„œ๋ฒ„ ์‹คํ–‰ ์ค‘
# ์„œ๋ฒ„ ์ข…๋ฃŒ ์‹œ
logger.info("๐Ÿ›‘ ์„œ๋ฒ„ ์ข…๋ฃŒ ์ด๋ฒคํŠธ ์‹คํ–‰ ์ค‘...")
logger.info("โœ… ์„œ๋ฒ„ ์ข…๋ฃŒ ์ด๋ฒคํŠธ ์™„๋ฃŒ")
# FastAPI ์•ฑ ์ƒ์„ฑ (lifespan ํฌํ•จ)
app = FastAPI(
title="Lily LLM API v2",
description="๋‹ค์ค‘ ๋ชจ๋ธ ์ง€์› LLM API ์„œ๋ฒ„",
version="2.0.0",
lifespan=lifespan
)
# CORS ์„ค์ •
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:8001",
"http://127.0.0.1:8001",
"http://localhost:3000",
"http://127.0.0.1:3000",
"*" # ๊ฐœ๋ฐœ ์ค‘์—๋Š” ๋ชจ๋“  origin ํ—ˆ์šฉ
],
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["*"],
)
# Pydantic ๋ชจ๋ธ๋“ค
class GenerateRequest(BaseModel):
prompt: str
model_id: Optional[str] = None # ๊ธฐ๋ณธ๊ฐ’ ์ œ๊ฑฐ - ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ ์‚ฌ์šฉ
max_length: Optional[int] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
do_sample: Optional[bool] = None
class GenerateResponse(BaseModel):
generated_text: str
processing_time: float
model_name: str
image_processed: bool
class MultimodalGenerateResponse(BaseModel):
generated_text: str
processing_time: float
model_name: str
model_id: Optional[str] = None
image_processed: bool = False
class HealthResponse(BaseModel):
status: str
model_loaded: bool
current_model: str
available_models: List[dict]
class DocumentUploadResponse(BaseModel):
success: bool
document_id: str
message: str
chunks: Optional[int] = None
latex_count: Optional[int] = None # LaTeX ์ˆ˜์‹ ๊ฐœ์ˆ˜ ํ•„๋“œ ์ถ”๊ฐ€
error: Optional[str] = None
auto_response: Optional[str] = None # ์ž๋™ ์‘๋‹ต ํ•„๋“œ ์ถ”๊ฐ€
class RAGResponse(BaseModel):
success: bool
response: str
context: str
sources: List[dict]
search_results: int
processing_time: float
# ์‚ฌ์šฉ์ž ๊ด€๋ จ ์‘๋‹ต ๋ชจ๋ธ
class UserResponse(BaseModel):
success: bool
user_id: str
username: Optional[str] = None
email: Optional[str] = None
created_at: Optional[str] = None
error: Optional[str] = None
class SessionResponse(BaseModel):
success: bool
session_id: str
session_name: Optional[str] = None
created_at: Optional[str] = None
error: Optional[str] = None
class ChatMessageResponse(BaseModel):
success: bool
message_id: int
content: str
message_type: str
timestamp: str
error: Optional[str] = None
# ์ธ์ฆ ๊ด€๋ จ ์‘๋‹ต ๋ชจ๋ธ
class LoginResponse(BaseModel):
success: bool
access_token: Optional[str] = None
refresh_token: Optional[str] = None
token_type: Optional[str] = None
user_id: Optional[str] = None
username: Optional[str] = None
error: Optional[str] = None
class TokenResponse(BaseModel):
success: bool
access_token: Optional[str] = None
token_type: Optional[str] = None
error: Optional[str] = None
# ์ „์—ญ ๋ณ€์ˆ˜
model = None
tokenizer = None
processor = None
current_profile = None
model_loaded = False
image_processor = None
executor = concurrent.futures.ThreadPoolExecutor()
def configure_cpu_threads():
"""CPU ์Šค๋ ˆ๋“œ ํ™˜๊ฒฝ ์ตœ์ ํ™” (vCPU ์ˆ˜์— ๋งž๊ฒŒ ์กฐ์ •)."""
print(f"๐Ÿ” [DEBUG] configure_cpu_threads ์‹œ์ž‘")
try:
# ๊ธฐ๋ณธ๊ฐ’: ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋˜๋Š” ์‹œ์Šคํ…œ CPU ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜๋˜ ๊ณผ๋„ํ•œ ์Šค๋ ˆ๋“œ ๋ฐฉ์ง€
env_threads = os.getenv("CPU_THREADS")
if env_threads is not None:
threads = max(1, int(env_threads))
else:
detected = os.cpu_count() or 2
# ์ปจํ…Œ์ด๋„ˆ/์„œ๋ฒ„์˜ vCPU ์ˆ˜๋ฅผ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•˜๋˜ ์ƒํ•œ 16 ์ ์šฉ
threads = max(1, min(detected, 16))
# OpenMP/MKL/numexpr
os.environ["OMP_NUM_THREADS"] = str(threads)
os.environ["MKL_NUM_THREADS"] = str(threads)
os.environ.setdefault("NUMEXPR_NUM_THREADS", str(threads))
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
# PyTorch ๋‚ด๋ถ€ ์Šค๋ ˆ๋“œ ์„ค์ •
try:
torch.set_num_threads(threads)
except Exception:
pass
try:
# ์—ฐ์‚ฐ ๊ฐ„ ์Šค๋ ˆ๋“œ ํ’€์€ 1~2 ๊ถŒ์žฅ(์ปจํ…์ŠคํŠธ ์Šค์œ„์นญ ๋น„์šฉ ์ ˆ๊ฐ)
torch.set_num_interop_threads(1 if threads <= 4 else 2)
except Exception:
pass
logger.info(f"๐Ÿงต CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
except Exception as e:
logger.warning(f"โš ๏ธ CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
print(f"๐Ÿ” [DEBUG] configure_cpu_threads ์ข…๋ฃŒ")
def select_model_interactive():
"""์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๋ชจ๋ธ ์„ ํƒ"""
available_models = list_available_models()
print("\n" + "="*60 + "\n๐Ÿค– Lily LLM API v2 - ๋ชจ๋ธ ์„ ํƒ\n" + "="*60)
for i, model_info in enumerate(available_models, 1):
print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
while True:
try:
# choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
# selected_model = available_models[int(choice) - 1]
selected_model = available_models[1]
print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
return selected_model['model_id']
except (ValueError, IndexError):
print(f"โŒ 1์—์„œ {len(available_models)} ์‚ฌ์ด์˜ ์ˆซ์ž๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
except KeyboardInterrupt:
sys.exit("\n\n๐Ÿ‘‹ ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.")
# @app.on_event("startup") - FastAPI ์ตœ์‹  ๋ฒ„์ „์—์„œ ์ž‘๋™ํ•˜์ง€ ์•Š์Œ
# startup_event ํ•จ์ˆ˜๋Š” lifespan์œผ๋กœ ์ด๋™๋จ
@app.on_event("shutdown")
def shutdown_event():
executor.shutdown(wait=True)
async def load_model_async(model_id: str):
loop = asyncio.get_event_loop()
await loop.run_in_executor(executor, load_model_sync, model_id)
@app.post("/load-model")
async def load_model_endpoint(model_id: str):
"""๋ชจ๋ธ ๋กœ๋“œ HTTP ์—”๋“œํฌ์ธํŠธ"""
try:
logger.info(f"๐Ÿ“ฅ HTTP ์š”์ฒญ์œผ๋กœ ๋ชจ๋ธ ๋กœ๋“œ ์‹œ์ž‘: {model_id}")
await load_model_async(model_id)
return {"success": True, "message": f"๋ชจ๋ธ '{model_id}' ๋กœ๋“œ ์™„๋ฃŒ"}
except Exception as e:
logger.error(f"โŒ HTTP ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
def load_model_sync(model_id: str):
"""๋ชจ๋ธ ๋ฐ ๊ด€๋ จ ํ”„๋กœ์„ธ์„œ๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ๋กœ๋”ฉ (์ตœ์ข… ์ˆ˜์ •๋ณธ)"""
global model, tokenizer, processor, current_profile, current_model
try:
if model is not None:
logger.info("๐Ÿ—‘๏ธ ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์ค‘...")
del model
del tokenizer
del processor
model, tokenizer, processor = None, None, None
import gc
gc.collect()
logger.info("โœ… ๊ธฐ์กด ๋ชจ๋ธ ์–ธ๋กœ๋“œ ์™„๋ฃŒ")
logger.info(f"๐Ÿ“ฅ '{model_id}' ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
current_profile = get_model_profile(model_id)
# ์ด์ œ load_model์€ (model, processor)๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
model, processor = current_profile.load_model()
# ๐Ÿ”„ ์ „์—ญ ๋ณ€์ˆ˜์— ๋ชจ๋ธ ์„ค์ • (LoRA์—์„œ ์‚ฌ์šฉ)
current_model = model
# processor์—์„œ tokenizer๋ฅผ ๊บผ๋‚ด ์ „์—ญ ๋ณ€์ˆ˜์— ํ• ๋‹นํ•ฉ๋‹ˆ๋‹ค.
if hasattr(processor, 'tokenizer'):
tokenizer = processor.tokenizer
else:
# processor ์ž์ฒด๊ฐ€ tokenizer ์—ญํ• ๋„ ํ•  ์ˆ˜ ์žˆ๋Š” ๊ฒฝ์šฐ
tokenizer = processor
logger.info(f"โœ… '{current_profile.display_name}' ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
# ๐Ÿ”„ LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ (๊ณตํ†ต ํ•จ์ˆ˜ ์‚ฌ์šฉ)
setup_lora_for_model(current_profile, lora_manager)
except Exception as e:
logger.error(f"โŒ load_model_sync ์‹คํŒจ: {e}")
import traceback
logger.error(f"๐Ÿ” ์ „์ฒด ์—๋Ÿฌ: {traceback.format_exc()}")
raise
def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_length: Optional[int] = None,
temperature: Optional[float] = None, top_p: Optional[float] = None,
do_sample: Optional[bool] = None, use_context: bool = True, session_id: str = None,
user_id: str = "anonymous", room_id: str = "default") -> dict:
"""[์ตœ์ ํ™”] ๋ชจ๋ธ ์ƒ์„ฑ์„ ์ฒ˜๋ฆฌํ•˜๋Š” ํ†ตํ•ฉ ๋™๊ธฐ ํ•จ์ˆ˜"""
try:
print(f"๐Ÿ” [DEBUG] generate_sync ์‹œ์ž‘ - prompt ๊ธธ์ด: {len(prompt)}")
print(f"๐Ÿ” [DEBUG] ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ: {current_profile.display_name if current_profile else 'None'}")
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ํƒ€์ž…: {type(current_profile) if current_profile else 'None'}")
if current_profile is None:
print("โŒ [DEBUG] ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์Œ")
return {"error": "No model loaded"}
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ด๋ฆ„: {getattr(current_profile, 'model_name', 'Unknown')}")
print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ง€์›: {getattr(current_profile, 'multimodal', False)}")
print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ: {prompt}")
print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ ๊ธธ์ด: {len(prompt)}")
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ ์กด์žฌ ์—ฌ๋ถ€: {image_data_list is not None}")
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ ๊ฐœ์ˆ˜: {len(image_data_list) if image_data_list else 0}")
print(f"๐Ÿ” [DEBUG] ์‹ค์ œ ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ ๊ฐœ์ˆ˜: {len([img for img in image_data_list if img]) if image_data_list else 0}")
image_processed = False
all_pixel_values = []
combined_image_metas = None
# --- 1. ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ (๊ณต์‹ ๋ฐฉ์‹) ---
# ๐Ÿ”„ RAG์—์„œ ์ถ”์ถœ๋œ ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ๋„ ํฌํ•จ
all_image_data = []
if image_data_list and len([img for img in image_data_list if img]) > 0:
all_image_data.extend(image_data_list)
print(f"๐Ÿ” [DEBUG] ์ง์ ‘ ์ „๋‹ฌ๋œ ์ด๋ฏธ์ง€ {len(image_data_list)}๊ฐœ ์ถ”๊ฐ€")
# ๐Ÿ”„ RAG์—์„œ ์ถ”์ถœ๋œ ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ๋Š” ํ˜„์žฌ ๊ตฌํ˜„์—์„œ ์ œ๊ฑฐ๋จ (์ „์—ญ ๋ณ€์ˆ˜ ๋ฌธ์ œ ํ•ด๊ฒฐ)
if all_image_data and len([img for img in all_image_data if img]) > 0 and getattr(current_profile, 'multimodal', False):
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹œ์ž‘ - ์ด ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜: {len([img for img in all_image_data if img])}")
# ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: ๊ฐ„๋‹จํ•œ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ
max_images = min(len(all_image_data), 4)
logger.info(f"๐Ÿ–ผ๏ธ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹œ์ž‘... (์ด๋ฏธ์ง€ {max_images}๊ฐœ)")
try:
metas_list = []
for idx, image_bytes in enumerate(all_image_data[:max_images]):
if image_bytes:
try:
pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
# ๐Ÿ”„ ๊ณต์‹ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ์‚ฌ์šฉ
if processor and hasattr(processor, 'image_processor'):
processed = processor.image_processor(pil_image)
all_pixel_values.append(processed["pixel_values"])
metas_list.append(processed.get("image_meta", {}))
else:
logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
except Exception as e:
logger.warning(f"โš ๏ธ ์ด๋ฏธ์ง€ {idx} ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
# ๐Ÿ”„ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ํ†ตํ•ฉ (๊ณต์‹ ๋ฐฉ์‹)
if metas_list:
combined_image_metas = {}
for key in metas_list[0].keys():
combined_image_metas[key] = [meta[key] for meta in metas_list if key in meta]
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {combined_image_metas}")
else:
combined_image_metas = {}
except Exception as e:
logger.error(f"โŒ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
combined_image_metas = {}
# --- 2. ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ ---
print(f"๐Ÿ” [DEBUG] ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ ์‹œ์ž‘")
# ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ (๋Œ€ํ™” ๊ธฐ๋ก + RAG ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ํฌํ•จ) - ๋ชจ๋ธ๋ณ„ ์ตœ์ ํ™”
context_prompt = ""
if use_context and session_id:
try:
# 1. ๋Œ€ํ™” ๊ธฐ๋ก ์ปจํ…์ŠคํŠธ
context = context_manager.get_context_for_model(
current_profile.model_name,
session_id
)
if context and len(context.strip()) > 0:
context_prompt = context + "\n\n"
print(f"๐Ÿ” [DEBUG] ๋Œ€ํ™” ์ปจํ…์ŠคํŠธ ํฌํ•จ๋จ - ๊ธธ์ด: {len(context_prompt)} (์„ธ์…˜: {session_id})")
# 2. RAG ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ปจํ…์ŠคํŠธ (PDF ๋‚ด์šฉ ํฌํ•จ)
try:
# ๏ฟฝ๏ฟฝ ์ƒˆ๋กœ์šด ๋ฉ”๋ชจ๋ฆฌ ์‹œ์Šคํ…œ์„ ์‚ฌ์šฉํ•œ RAG ์ปจํ…์ŠคํŠธ ๋กœ๋“œ
rag_context = ""
# ๐Ÿ”’ ์‚ฌ์šฉ์ž ์„ค์ • ํ™•์ธ
from lily_llm_core.user_memory_manager import user_memory_manager
keep_memory = user_memory_manager.get_memory_setting(user_id, "keep_memory_on_room_change")
if keep_memory:
# ๋ฉ”๋ชจ๋ฆฌ ์œ ์ง€ ๋ชจ๋“œ - ๊ธฐ์กด ๋กœ์ง ์‹คํ–‰
print(f"๐Ÿ”„ [DEBUG] ์‚ฌ์šฉ์ž {user_id} ๋ฉ”๋ชจ๋ฆฌ ์œ ์ง€ ๋ชจ๋“œ - RAG ์ปจํ…์ŠคํŠธ ๋กœ๋“œ")
# ํ†ตํ•ฉ ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ์ž์—์„œ AI์šฉ ์ปจํ…์ŠคํŠธ ์ƒ์„ฑ
ai_context = integrated_memory_manager.get_context_for_ai(
user_id=user_id,
room_id=room_id,
session_id=session_id,
include_user_memory=True,
include_room_context=True,
include_session_history=False # ํ˜„์žฌ ๋Œ€ํ™”๋Š” ๋ณ„๋„๋กœ ์ฒ˜๋ฆฌ
)
if ai_context:
rag_context += f"\n\n๐Ÿ”— ๋ฉ”๋ชจ๋ฆฌ ์ปจํ…์ŠคํŠธ:\n{ai_context}\n"
print(f"๐Ÿ” [DEBUG] ๋ฉ”๋ชจ๋ฆฌ ์ปจํ…์ŠคํŠธ ํฌํ•จ๋จ - ๊ธธ์ด: {len(ai_context)}")
# ๊ธฐ์กด RAG ์‹œ์Šคํ…œ์—์„œ ๋ฌธ์„œ ๋‚ด์šฉ ๊ฐ€์ ธ์˜ค๊ธฐ (room_id ๊ธฐ๋ฐ˜)
try:
# ์ฑ„ํŒ…๋ฐฉ๋ณ„ ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ์กฐํšŒ
room_context = integrated_memory_manager.room_context_manager.get_room_context(room_id)
if room_context and room_context.documents:
rag_context += "\n\n๐Ÿ“„ ์—…๋กœ๋“œ๋œ ๋ฌธ์„œ ๋ชฉ๋ก:\n"
for doc in room_context.documents[-3:]: # ์ตœ๊ทผ 3๊ฐœ๋งŒ
# ๋”•์…”๋„ˆ๋ฆฌ์™€ ๊ฐ์ฒด ๋ชจ๋‘ ์ฒ˜๋ฆฌ
if isinstance(doc, dict):
filename = doc.get('filename', 'unknown')
doc_type = doc.get('document_type', 'unknown')
page_count = doc.get('page_count', 0)
else:
filename = getattr(doc, 'filename', 'unknown')
doc_type = getattr(doc, 'document_type', 'unknown')
page_count = getattr(doc, 'page_count', 0)
rag_context += f" - {filename} ({doc_type}, {page_count}ํŽ˜์ด์ง€)\n"
print(f"๐Ÿ” [DEBUG] ์ฑ„ํŒ…๋ฐฉ {room_id}์˜ ๋ฌธ์„œ {len(room_context.documents)}๊ฐœ ๋ฐœ๊ฒฌ")
except Exception as e:
print(f"โš ๏ธ ์ฑ„ํŒ…๋ฐฉ ๋ฌธ์„œ ์ปจํ…์ŠคํŠธ ๋กœ๋“œ ์‹คํŒจ: {e}")
# ๐Ÿ”’ ๋ฌธ์„œ ๋‚ด์šฉ ์ž์ฒด๋Š” ๋กœ๋“œํ•˜์ง€ ์•Š์Œ (ํ„ด๋ณ„ ์ดˆ๊ธฐํ™”)
# ์ด์ „ ํ„ด์—์„œ ์ฒจ๋ถ€๋œ ๋ฌธ์„œ์˜ ์‹ค์ œ ๋‚ด์šฉ์€ AI ์ปจํ…์ŠคํŠธ์— ํฌํ•จํ•˜์ง€ ์•Š์Œ
print(f"๏ฟฝ๏ฟฝ [DEBUG] ๋ฌธ์„œ ๋‚ด์šฉ ๋กœ๋“œ ๊ฑด๋„ˆ๋›ฐ๊ธฐ - ํ„ด๋ณ„ ์ดˆ๊ธฐํ™” ์ ์šฉ")
# ๏ฟฝ๏ฟฝ ํ˜„์žฌ ํ„ด์—์„œ๋งŒ ๋ฌธ์„œ ์ •๋ณด ํ‘œ์‹œ (์‹ค์ œ ๋‚ด์šฉ์€ ๋กœ๋“œํ•˜์ง€ ์•Š์Œ)
if rag_context:
context_prompt += rag_context
print(f"๐Ÿ” [DEBUG] ๋ฌธ์„œ ๋ชฉ๋ก๋งŒ ํ‘œ์‹œ - ์‹ค์ œ ๋‚ด์šฉ ๋กœ๋“œ ์•ˆํ•จ (ํ„ด๋ณ„ ์ดˆ๊ธฐํ™”)")
except Exception as e:
print(f"โš ๏ธ [DEBUG] RAG ์ปจํ…์ŠคํŠธ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
if not context_prompt:
print(f"๏ฟฝ๏ฟฝ [DEBUG] ์ปจํ…์ŠคํŠธ ์—†์Œ ๋˜๋Š” ๋น„์–ด์žˆ์Œ (์„ธ์…˜: {session_id})")
except Exception as e:
print(f"โš ๏ธ [DEBUG] ์ปจํ…์ŠคํŠธ ๋กœ๋“œ ์‹คํŒจ: {e} (์„ธ์…˜: {session_id})")
context_prompt = ""
# formatted_prompt ์ดˆ๊ธฐํ™”
formatted_prompt = None
# ๐Ÿ”„ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ๋ฐฉ์‹)
if all_pixel_values and len(all_pixel_values) > 0:
# ๐Ÿ”„ ๊ณต์‹ Kanana ํ˜•์‹: Human: <image> ํ…์ŠคํŠธ
# ์ด๋ฏธ์ง€ ํ† ํฐ์€ encode_prompt์—์„œ ์ž๋™์œผ๋กœ ์ฒ˜๋ฆฌ๋จ
formatted_prompt = f"Human: <image>{prompt}"
print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (๊ณต์‹ ํ˜•์‹): {formatted_prompt}")
image_processed = True
else:
image_tokens = ""
image_processed = False
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ์—†์Œ - ํ…์ŠคํŠธ-only ๋ชจ๋“œ")
# ํ…์ŠคํŠธ-only ๋ชจ๋ธ์šฉ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (์ปจํ…์ŠคํŠธ ํฌํ•จ)
if hasattr(current_profile, 'format_prompt'):
# Polyglot ๋ชจ๋ธ์ผ ๋•Œ๋Š” format_prompt ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ (์ปจํ…์ŠคํŠธ ์ง€์›)
if "polyglot" in current_profile.model_name.lower():
# ์ปจํ…์ŠคํŠธ์™€ ํ”„๋กฌํ”„ํŠธ๋ฅผ ํ•จ๊ป˜ ์ „๋‹ฌ
formatted_prompt = current_profile.format_prompt(prompt, context_prompt)
else:
# ๋‹ค๋ฅธ ๋ชจ๋ธ์€ ๊ธฐ์กด ๋ฐฉ์‹ ์‚ฌ์šฉ
base_prompt = current_profile.format_prompt(prompt)
if context_prompt:
formatted_prompt = context_prompt + base_prompt
else:
formatted_prompt = base_prompt
print(f"๐Ÿ” [DEBUG] ํ”„๋กœํ•„ format_prompt ์‚ฌ์šฉ (์ปจํ…์ŠคํŠธ ํฌํ•จ): {formatted_prompt}")
else:
# ๊ธฐ๋ณธ ํ”„๋กฌํ”„ํŠธ (fallback) - ์ปจํ…์ŠคํŠธ ํฌํ•จ
# Polyglot ๋ชจ๋ธ์€ <|im_start|> ํƒœ๊ทธ๋ฅผ ์ œ๋Œ€๋กœ ์ฒ˜๋ฆฌํ•˜์ง€ ๋ชปํ•จ
if "polyglot" in current_profile.model_name.lower():
base_prompt = f"### ์‚ฌ์šฉ์ž:\n{prompt}\n\n### ์ฑ—๋ด‡:\n"
else:
base_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
if context_prompt:
formatted_prompt = context_prompt + base_prompt
else:
formatted_prompt = base_prompt
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ํ”„๋กฌํ”„ํŠธ ์‚ฌ์šฉ (์ปจํ…์ŠคํŠธ ํฌํ•จ): {formatted_prompt}")
print(f"๐Ÿ” [DEBUG] ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ ์™„๋ฃŒ - ๊ธธ์ด: {len(formatted_prompt) if formatted_prompt else 0}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
# --- 3. ํ† ํฌ๋‚˜์ด์ง• ---
print(f"๐Ÿ” [DEBUG] ํ† ํฌ๋‚˜์ด์ง• ์‹œ์ž‘")
t_tok_start = time.time()
if not all_image_data or len([img for img in all_image_data if img]) == 0:
# ํ…์ŠคํŠธ-only ๊ณ ์ • ๊ฒฝ๋กœ (๋” ๋น ๋ฆ„)
print(f"๐Ÿ” [DEBUG] ํ…์ŠคํŠธ-only ํ† ํฌ๋‚˜์ด์ง• ๊ฒฝ๋กœ")
print(f"๐Ÿ” [DEBUG] ์‚ฌ์šฉํ•  ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
inputs = tokenizer(
formatted_prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=2048,
)
if 'token_type_ids' in inputs:
del inputs['token_type_ids']
print(f"๐Ÿ” [DEBUG] token_type_ids ์ œ๊ฑฐ๋จ")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
print(f"๐Ÿ” [DEBUG] ํ† ํฌ๋‚˜์ด์ € ์ถœ๋ ฅ: {list(inputs.keys())}")
else:
# ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ(Lite): Kanana ์ „์šฉ encode_prompt๋กœ -1 ํ† ํฐ ์ž๋ฆฌ ์ƒ์„ฑ (ํ•„์ˆ˜)
print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ํ† ํฌ๋‚˜์ด์ง• ๊ฒฝ๋กœ")
print(f"๐Ÿ” [DEBUG] combined_image_metas: {combined_image_metas}")
print(f"๐Ÿ” [DEBUG] ์ด ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜: {len(all_image_data)}")
if hasattr(tokenizer, 'encode_prompt'):
print(f"๐Ÿ” [DEBUG] encode_prompt ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ")
# ๐Ÿ”„ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๊ฒ€์ฆ ๋ฐ ์•ˆ์ „ํ™”
safe_image_meta = {}
if combined_image_metas:
# image_token_thw ๋ฐฐ์—ด ๊ธธ์ด ๊ฒ€์ฆ
if 'image_token_thw' in combined_image_metas:
image_token_thw = combined_image_metas['image_token_thw']
if isinstance(image_token_thw, list) and len(image_token_thw) > 0:
# ๋ฐฐ์—ด ๊ธธ์ด๊ฐ€ ์ด๋ฏธ์ง€ ๊ฐœ์ˆ˜์™€ ์ผ์น˜ํ•˜๋Š”์ง€ ํ™•์ธ
if len(image_token_thw) == len(all_pixel_values):
# ๐Ÿ”„ ์ถ”๊ฐ€ ๊ฒ€์ฆ: ๊ฐ ๋ฐฐ์—ด ์š”์†Œ๊ฐ€ ์œ ํšจํ•œ์ง€ ํ™•์ธ
valid_meta = True
for i, thw in enumerate(image_token_thw):
if not isinstance(thw, (list, tuple)) or len(thw) != 3:
print(f"โš ๏ธ [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์š”์†Œ {i}๊ฐ€ ์œ ํšจํ•˜์ง€ ์•Š์Œ: {thw}")
valid_meta = False
break
if valid_meta:
safe_image_meta = combined_image_metas
print(f"๐Ÿ” [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๊ฒ€์ฆ ํ†ต๊ณผ: {len(image_token_thw)}๊ฐœ ์ด๋ฏธ์ง€")
else:
print(f"โš ๏ธ [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์š”์†Œ ๊ฒ€์ฆ ์‹คํŒจ, ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ")
safe_image_meta = {
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
}
else:
print(f"โš ๏ธ [DEBUG] ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋ถˆ์ผ์น˜: ์ด๋ฏธ์ง€ {len(all_pixel_values)}๊ฐœ, ๋ฉ”ํƒ€ {len(image_token_thw)}๊ฐœ")
# ์•ˆ์ „ํ•œ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
safe_image_meta = {
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
}
else:
print(f"โš ๏ธ [DEBUG] image_token_thw๊ฐ€ ์œ ํšจํ•˜์ง€ ์•Š์Œ, ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ")
safe_image_meta = {
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
}
else:
print(f"โš ๏ธ [DEBUG] image_token_thw ์—†์Œ, ๊ธฐ๋ณธ๊ฐ’ ์ƒ์„ฑ")
safe_image_meta = {
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
}
else:
print(f"โš ๏ธ [DEBUG] combined_image_metas ์—†์Œ, ๊ธฐ๋ณธ๊ฐ’ ์ƒ์„ฑ")
safe_image_meta = {
'image_token_thw': [[1, 1, 1]] * len(all_pixel_values),
'vision_grid_thw': [[1, 1, 1]] * len(all_pixel_values)
}
print(f"๐Ÿ” [DEBUG] ์•ˆ์ „ํ™”๋œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {safe_image_meta}")
# ๐Ÿ”„ ์•ˆ์ „ํ•œ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋กœ encode_prompt ํ˜ธ์ถœ
try:
# ๐Ÿ”„ ์ถ”๊ฐ€ ์•ˆ์ „์žฅ์น˜: ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ๋ณต์‚ฌ๋ณธ ์ƒ์„ฑ
final_meta = {}
for key, value in safe_image_meta.items():
if isinstance(value, list):
final_meta[key] = value.copy() # ๋ณต์‚ฌ๋ณธ ์ƒ์„ฑ
else:
final_meta[key] = value
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ: {final_meta}")
# ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: max_length ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€
inputs = tokenizer.encode_prompt(
prompt=formatted_prompt,
max_length=2048, # ๊ณต์‹ ์ฝ”๋“œ์™€ ๋™์ผ
image_meta=final_meta
)
print(f"๐Ÿ” [DEBUG] encode_prompt ์ถœ๋ ฅ: {list(inputs.keys())}")
# ๐Ÿ”„ encode_prompt ์ถœ๋ ฅ ์ •๊ทœํ™” (seq_length ์ œ๊ฑฐ)
if 'seq_length' in inputs:
print(f"๐Ÿ” [DEBUG] seq_length ์ œ๊ฑฐ๋จ")
del inputs['seq_length']
# ๐Ÿ”„ input_ids ์•ˆ์ „ํ•˜๊ฒŒ ์ถ”์ถœ (๊ณต์‹ ๋ฐฉ์‹)
if isinstance(inputs['input_ids'], tuple):
print(f"๐Ÿ” [DEBUG] input_ids๊ฐ€ ํŠœํ”Œ์ž„: {len(inputs['input_ids'])}๊ฐœ ์š”์†Œ")
input_ids = inputs['input_ids'][0] # ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
print(f"๐Ÿ” [DEBUG] input_ids ํŠœํ”Œ์—์„œ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์ถ”์ถœ: {input_ids.shape}")
else:
input_ids = inputs['input_ids']
# ๐Ÿ”„ attention_mask๋„ ์•ˆ์ „ํ•˜๊ฒŒ ์ถ”์ถœ
if isinstance(inputs['attention_mask'], tuple):
print(f"๐Ÿ” [DEBUG] attention_mask๊ฐ€ ํŠœํ”Œ์ž„: {len(inputs['attention_mask'])}๊ฐœ ์š”์†Œ")
attention_mask = inputs['attention_mask'][0] # ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
print(f"๐Ÿ” [DEBUG] attention_mask ํŠœํ”Œ์—์„œ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์ถ”์ถœ: {attention_mask.shape}")
else:
attention_mask = inputs['attention_mask']
# ๐Ÿ”„ ์ตœ์ข… ๊ฒ€์ฆ
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… input_ids ํƒ€์ž…: {type(input_ids)}, shape: {input_ids.shape}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… attention_mask ํƒ€์ž…: {type(attention_mask)}, shape: {attention_mask.shape}")
except Exception as e:
print(f"โŒ [DEBUG] encode_prompt ์‹คํŒจ: {e}, ํด๋ฐฑ ์‚ฌ์šฉ")
# ํด๋ฐฑ: ๊ธฐ๋ณธ ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ
inputs = tokenizer(
formatted_prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=2048,
)
if 'token_type_ids' in inputs:
del inputs['token_type_ids']
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
else:
# ์•ˆ์ „ ํด๋ฐฑ
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ (ํด๋ฐฑ)")
inputs = tokenizer(
formatted_prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_lengt=2048,
)
if 'token_type_ids' in inputs:
del inputs['token_type_ids']
print(f"๐Ÿ” [DEBUG] token_type_ids ์ œ๊ฑฐ๋จ (ํด๋ฐฑ)")
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ํ† ํฌ๋‚˜์ด์ € ์ถœ๋ ฅ: {list(inputs.keys())}")
t_tok_end = time.time()
print(f"๐Ÿ” [DEBUG] ํ† ํฌ๋‚˜์ด์ง• ์™„๋ฃŒ - ์†Œ์š”์‹œ๊ฐ„: {t_tok_end - t_tok_start:.3f}์ดˆ")
# ๐Ÿ”„ input_ids ์•ˆ์ „ํ•˜๊ฒŒ ์ฒ˜๋ฆฌ
if isinstance(input_ids, tuple):
print(f"๐Ÿ” [DEBUG] input_ids๊ฐ€ ํŠœํ”Œ์ž„: {len(input_ids)}๊ฐœ ์š”์†Œ")
input_ids = input_ids[0] # ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์‚ฌ์šฉ
print(f"๐Ÿ” [DEBUG] input_ids ํŠœํ”Œ์—์„œ ์ฒซ ๋ฒˆ์งธ ์š”์†Œ ์ถ”์ถœ: {input_ids.shape}")
# ๐Ÿ”„ 1์ฐจ์› ํ…์„œ๋ฅผ 2์ฐจ์›์œผ๋กœ reshape
if len(input_ids.shape) == 1:
print(f"๐Ÿ” [DEBUG] 1์ฐจ์› ํ…์„œ๋ฅผ 2์ฐจ์›์œผ๋กœ reshape: {input_ids.shape} -> (1, {input_ids.shape[0]})")
input_ids = input_ids.unsqueeze(0) # (seq_len,) -> (1, seq_len)
# ๐Ÿ”„ attention_mask๋„ ๋™์ผํ•˜๊ฒŒ ์ฒ˜๋ฆฌ
if len(attention_mask.shape) == 1:
print(f"๐Ÿ” [DEBUG] attention_mask 1์ฐจ์›์„ 2์ฐจ์›์œผ๋กœ reshape: {attention_mask.shape} -> (1, {attention_mask.shape[0]})")
attention_mask = attention_mask.unsqueeze(0) # (seq_len,) -> (1, seq_len)
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… input_ids shape: {input_ids.shape}")
print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ† ํฐ ์ˆ˜: {input_ids.shape[1]}")
# --- 4. ์ƒ์„ฑ ์„ค์ • ---
print(f"๐Ÿ” [DEBUG] ์ƒ์„ฑ ์„ค์ • ๊ตฌ์„ฑ ์‹œ์ž‘")
gen_config = current_profile.get_generation_config()
# config ํŒŒ์ผ์— ๋ช…์‹œ๋œ eos, pad, bos ํ† ํฐ id ๊ธฐ๋ณธ๊ฐ’์œผ๋กœ ์ฑ„์šฐ๊ธฐ
if 'eos_token_id' not in gen_config or gen_config['eos_token_id'] is None:
gen_config['eos_token_id'] = tokenizer.eos_token_id
if 'pad_token_id' not in gen_config or gen_config['pad_token_id'] is None:
gen_config['pad_token_id'] = tokenizer.pad_token_id or tokenizer.eos_token_id
# ํ•„์š”ํ•  ๊ฒฝ์šฐ bos_token_id ๋„ ์„ค์ • (generate ํ•จ์ˆ˜์— ๋”ฐ๋ผ ๋‹ค๋ฆ„)
if 'bos_token_id' not in gen_config and hasattr(tokenizer, 'bos_token_id'):
gen_config['bos_token_id'] = tokenizer.bos_token_id
# max_new_tokens, temperature ๋“ฑ API ์ธ์ž ๋ฐ›์•„์„œ ๋ฎ์–ด์“ฐ๊ธฐ
if max_length is not None:
gen_config['max_new_tokens'] = max_length
if temperature is not None:
gen_config['temperature'] = temperature
if top_p is not None:
gen_config['top_p'] = top_p
if do_sample is not None:
gen_config['do_sample'] = do_sample
print(f"๐Ÿ” [DEBUG] ์ƒ์„ฑ ์„ค์ •: {gen_config}")
# --- 5. ์‹ค์ œ ์ถ”๋ก  ์‹คํ–‰ ---
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ถ”๋ก  ์‹œ์ž‘")
t_gen_start = time.time()
try:
# ๋ชจ๋ธ ์ƒํƒœ ํ™•์ธ
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ๋””๋ฐ”์ด์Šค: {model.device}")
print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ…์„œ ๋””๋ฐ”์ด์Šค: {input_ids.device}")
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ํƒ€์ž…: {type(model)}")
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒํƒœ: {'eval' if model.training == False else 'training'}")
print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ…์„œ shape: {input_ids.shape}")
print(f"๐Ÿ” [DEBUG] attention_mask shape: {attention_mask.shape}")
print(f"๐Ÿ” [DEBUG] all_pixel_values ์กด์žฌ ์—ฌ๋ถ€: {all_pixel_values is not None}")
print(f"๐Ÿ” [DEBUG] all_pixel_values ๊ธธ์ด: {len(all_pixel_values) if all_pixel_values else 0}")
# ์ž…๋ ฅ ํ…์„œ๋ฅผ ๋ชจ๋ธ ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™
if input_ids.device != model.device:
print(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ ํ…์„œ๋ฅผ ๋ชจ๋ธ ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™: {input_ids.device} -> {model.device}")
input_ids = input_ids.to(model.device)
attention_mask = attention_mask.to(model.device)
# ๐Ÿ”„ torch import ๋ฌธ์ œ ํ•ด๊ฒฐ
import torch
with torch.no_grad():
if all_pixel_values and len(all_pixel_values) > 0:
# ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ: ์ด๋ฏธ์ง€์™€ ํ…์ŠคํŠธ ํ•จ๊ป˜ ์ฒ˜๋ฆฌ
print(f"๐Ÿ” [DEBUG] ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ถ”๋ก  ์‹คํ–‰")
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ ๊ฐœ์ˆ˜: {len(all_pixel_values)}")
# ์ด๋ฏธ์ง€ ํ…์„œ๋„ ๋””๋ฐ”์ด์Šค ํ™•์ธ
pixel_values = torch.cat(all_pixel_values, dim=0)
print(f"๐Ÿ” [DEBUG] ๊ฒฐํ•ฉ๋œ ์ด๋ฏธ์ง€ ํ…์„œ shape: {pixel_values.shape}")
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ dtype: {pixel_values.dtype}")
# ๐Ÿ”„ ๋ชจ๋ธ๊ณผ ๋™์ผํ•œ dtype์œผ๋กœ ๋ณ€ํ™˜ (์„ฑ๋Šฅ ์ตœ์ ํ™”)
if hasattr(model, 'dtype'):
target_dtype = model.dtype
if pixel_values.dtype != target_dtype:
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ dtype ๋ณ€ํ™˜: {pixel_values.dtype} -> {target_dtype}")
pixel_values = pixel_values.to(dtype=target_dtype)
else:
# ๐Ÿ”„ ๋ชจ๋ธ dtype์„ ์•Œ ์ˆ˜ ์—†๋Š” ๊ฒฝ์šฐ bfloat16 ์‚ฌ์šฉ (Kanana ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’)
target_dtype = torch.bfloat16
if pixel_values.dtype != target_dtype:
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ dtype ๋ณ€ํ™˜: {pixel_values.dtype} -> {target_dtype}")
pixel_values = pixel_values.to(dtype=target_dtype)
if pixel_values.device != model.device:
print(f"๐Ÿ” [DEBUG] ์ด๋ฏธ์ง€ ํ…์„œ๋ฅผ ๋ชจ๋ธ ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™: {pixel_values.device} -> {model.device}")
pixel_values = pixel_values.to(model.device)
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ด๋ฏธ์ง€ ํ…์„œ ๋””๋ฐ”์ด์Šค: {pixel_values.device}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ด๋ฏธ์ง€ ํ…์„œ dtype: {pixel_values.dtype}")
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘ - ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ")
# LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ ์šฉ๋œ ๋ชจ๋ธ์ธ์ง€ ํ™•์ธ
if LORA_AVAILABLE and lora_manager and hasattr(lora_manager, 'current_adapter_name') and lora_manager.current_adapter_name:
print(f"๐Ÿ” [DEBUG] LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ๋จ (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ): {lora_manager.current_adapter_name}")
# LoRA๊ฐ€ ์ ์šฉ๋œ ๋ชจ๋ธ ์‚ฌ์šฉ
lora_model = lora_manager.get_model()
if lora_model:
print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ๋กœ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹คํ–‰")
# ๐Ÿ”„ image_metas ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€ (๊ณต์‹ ๋ฐฉ์‹)
# ๐Ÿ”„ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ๊ณต์‹ ๊ตฌ์กฐ๋กœ ๋ณ€ํ™˜ (๋ชจ๋ธ ์š”๊ตฌ์‚ฌํ•ญ)
import torch
processed_image_metas = {}
# ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: vision_grid_thw๋ฅผ ํ…์„œ๋กœ ๋ณ€ํ™˜
if 'vision_grid_thw' in combined_image_metas:
vision_grid = combined_image_metas['vision_grid_thw']
if isinstance(vision_grid, list):
# ๐Ÿ”„ Kanana ๋ชจ๋ธ ์š”๊ตฌ์‚ฌํ•ญ: (T, H, W) ํ˜•ํƒœ์˜ 3์ฐจ์› ํ…์„œ
if len(vision_grid) == 1 and len(vision_grid[0]) == 3:
# [(1, 34, 52)] -> (1, 34, 52) ํ…์„œ๋กœ ๋ณ€ํ™˜
t, h, w = vision_grid[0]
# ๐Ÿ”„ 3์ฐจ์› ํ…์„œ๋กœ ๋ณ€ํ™˜: (1, H, W) ํ˜•ํƒœ
processed_image_metas['vision_grid_thw'] = torch.tensor([[t, h, w]], dtype=torch.long)
print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜: {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
else:
# ๐Ÿ”„ ๋‹ค๋ฅธ ํ˜•ํƒœ์˜ ๊ฒฝ์šฐ ์›๋ณธ ์œ ์ง€
processed_image_metas['vision_grid_thw'] = torch.tensor(vision_grid, dtype=torch.long)
print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜ (๊ธฐ๋ณธ): {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
else:
processed_image_metas['vision_grid_thw'] = vision_grid
# ๐Ÿ”„ ๋‹ค๋ฅธ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋Š” ๊ทธ๋Œ€๋กœ ์œ ์ง€
for key, value in combined_image_metas.items():
if key != 'vision_grid_thw':
processed_image_metas[key] = value
generate_kwargs = {
'input_ids': input_ids,
'attention_mask': attention_mask,
'pixel_values': pixel_values,
'image_metas': processed_image_metas, # ๐Ÿ”„ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
**gen_config
}
print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘... (ํƒ€์ž„์•„์›ƒ ์—†์Œ)")
# ๐Ÿ”„ ์ƒ์„ฑ ์ „ ์ตœ์ข… ๊ฒ€์ฆ
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ํŒŒ๋ผ๋ฏธํ„ฐ ๊ฒ€์ฆ:")
print(f" - input_ids: {input_ids.shape}, dtype: {input_ids.dtype}")
print(f" - attention_mask: {attention_mask.shape}, dtype: {attention_mask.dtype}")
print(f" - pixel_values: {pixel_values.shape}, dtype: {pixel_values.dtype}")
print(f" - vision_grid_thw: {processed_image_metas.get('vision_grid_thw', 'None')}")
generated_ids = lora_model.generate(**generate_kwargs)
else:
print(f"โš ๏ธ [DEBUG] LoRA ๋ชจ๋ธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Œ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
# ๐Ÿ”„ image_metas ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€ (๊ณต์‹ ๋ฐฉ์‹)
# ๐Ÿ”„ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ๊ณต์‹ ๊ตฌ์กฐ๋กœ ๋ณ€ํ™˜ (๋ชจ๋ธ ์š”๊ตฌ์‚ฌํ•ญ)
processed_image_metas = {}
# ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: vision_grid_thw๋ฅผ ํ…์„œ๋กœ ๋ณ€ํ™˜
if 'vision_grid_thw' in combined_image_metas:
vision_grid = combined_image_metas['vision_grid_thw']
if isinstance(vision_grid, list):
# ๐Ÿ”„ Kanana ๋ชจ๋ธ ์š”๊ตฌ์‚ฌํ•ญ: (T, H, W) ํ˜•ํƒœ์˜ 3์ฐจ์› ํ…์„œ
if len(vision_grid) == 1 and len(vision_grid[0]) == 3:
# [(1, 34, 52)] -> (1, 34, 52) ํ…์„œ๋กœ ๋ณ€ํ™˜
t, h, w = vision_grid[0]
# ๐Ÿ”„ 3์ฐจ์› ํ…์„œ๋กœ ๋ณ€ํ™˜: (1, H, W) ํ˜•ํƒœ
processed_image_metas['vision_grid_thw'] = torch.tensor([[t, h, w]], dtype=torch.long)
print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜: {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
else:
# ๐Ÿ”„ ๋‹ค๋ฅธ ํ˜•ํƒœ์˜ ๊ฒฝ์šฐ ์›๋ณธ ์œ ์ง€
processed_image_metas['vision_grid_thw'] = torch.tensor(vision_grid, dtype=torch.long)
print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜ (๊ธฐ๋ณธ): {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
else:
processed_image_metas['vision_grid_thw'] = vision_grid
# ๐Ÿ”„ ๋‹ค๋ฅธ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋Š” ๊ทธ๋Œ€๋กœ ์œ ์ง€
for key, value in combined_image_metas.items():
if key != 'vision_grid_thw':
processed_image_metas[key] = value
generate_kwargs = {
'input_ids': input_ids,
'attention_mask': attention_mask,
'pixel_values': pixel_values,
'image_metas': processed_image_metas, # ๐Ÿ”„ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
**gen_config
}
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
generated_ids = model.generate(**generate_kwargs)
else:
print(f"๐Ÿ” [DEBUG] LoRA ์–ด๋Œ‘ํ„ฐ ์—†์Œ (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ), ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
# ๐Ÿ”„ image_metas ํŒŒ๋ผ๋ฏธํ„ฐ ์ถ”๊ฐ€ (๊ณต์‹ ๋ฐฉ์‹)
# ๐Ÿ”„ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ๊ณต์‹ ๊ตฌ์กฐ๋กœ ๋ณ€ํ™˜ (๋ชจ๋ธ ์š”๊ตฌ์‚ฌํ•ญ)
processed_image_metas = {}
# ๐Ÿ”„ ๊ณต์‹ ๋ฐฉ์‹: vision_grid_thw๋ฅผ ํ…์„œ๋กœ ๋ณ€ํ™˜
if 'vision_grid_thw' in combined_image_metas:
vision_grid = combined_image_metas['vision_grid_thw']
if isinstance(vision_grid, list):
# ๐Ÿ”„ Kanana ๋ชจ๋ธ ์š”๊ตฌ์‚ฌํ•ญ: (T, H, W) ํ˜•ํƒœ์˜ 3์ฐจ์› ํ…์„œ
if len(vision_grid) == 1 and len(vision_grid[0]) == 3:
# [(1, 34, 52)] -> (1, 34, 52) ํ…์„œ๋กœ ๋ณ€ํ™˜
t, h, w = vision_grid[0]
# ๐Ÿ”„ 3์ฐจ์› ํ…์„œ๋กœ ๋ณ€ํ™˜: (1, H, W) ํ˜•ํƒœ
processed_image_metas['vision_grid_thw'] = torch.tensor([[t, h, w]], dtype=torch.long)
print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜: {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
else:
# ๐Ÿ”„ ๋‹ค๋ฅธ ํ˜•ํƒœ์˜ ๊ฒฝ์šฐ ์›๋ณธ ์œ ์ง€
processed_image_metas['vision_grid_thw'] = torch.tensor(vision_grid, dtype=torch.long)
print(f"๐Ÿ” [DEBUG] vision_grid_thw ํ…์„œ ๋ณ€ํ™˜ (๊ธฐ๋ณธ): {vision_grid} -> {processed_image_metas['vision_grid_thw'].shape}")
else:
processed_image_metas['vision_grid_thw'] = vision_grid
# ๐Ÿ”„ ๋‹ค๋ฅธ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋Š” ๊ทธ๋Œ€๋กœ ์œ ์ง€
for key, value in combined_image_metas.items():
if key != 'vision_grid_thw':
processed_image_metas[key] = value
generate_kwargs = {
'input_ids': input_ids,
'attention_mask': attention_mask,
'pixel_values': pixel_values,
'image_metas': processed_image_metas, # ๐Ÿ”„ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ
**gen_config
}
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ๋ชจ๋ธ ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ: {list(generate_kwargs.keys())}")
print(f"๐Ÿ” [DEBUG] ์ฒ˜๋ฆฌ๋œ image_metas: {list(processed_image_metas.keys())}")
generated_ids = model.generate(**generate_kwargs)
# ํ† ํฐ ์„ค์ •์„ ๋ช…์‹œ์ ์œผ๋กœ ์ „๋‹ฌํ•˜์—ฌ EOS ํ† ํฐ ๋ฌธ์ œ ํ•ด๊ฒฐ
# generate_kwargs = {
# 'input_ids': input_ids.to(model.device),
# 'attention_mask': attention_mask.to(model.device),
# 'pixel_values': pixel_values.to(model.device),
# 'max_new_tokens': gen_config['max_new_tokens'],
# 'temperature': gen_config['temperature'],
# 'top_p': gen_config['top_p'],
# 'do_sample': gen_config['do_sample'],
# 'repetition_penalty': gen_config.get('repetition_penalty', 1.0),
# 'no_repeat_ngram_size': gen_config.get('no_repeat_ngram_size', 0),
# # 'num_beams': gen_config.get('num_beams', 1),
# 'use_cache': gen_config.get('use_cache', True),
# 'max_time': gen_config.get('max_time', None),
# 'early_stopping': gen_config.get('early_stopping', False),
# 'stopping_criteria': gen_config.get('stopping_criteria', None),
# }
#
# # ํ† ํฐ ID ์„ค์ • (์ค‘์š”!)
# if gen_config.get('eos_token_id') is not None:
# generate_kwargs['eos_token_id'] = gen_config['eos_token_id']
# if gen_config.get('pad_token_id') is not None:
# generate_kwargs['pad_token_id'] = gen_config['pad_token_id']
# if gen_config.get('bos_token_id') is not None:
# generate_kwargs['bos_token_id'] = gen_config['bos_token_id']
#
# print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ƒ์„ฑ ์„ค์ •: {generate_kwargs}")
#
# generated_ids = model.generate(**generate_kwargs)
else:
# ํ…์ŠคํŠธ-only: ๊ธฐ์กด ๋ฐฉ์‹
print(f"๐Ÿ” [DEBUG] ํ…์ŠคํŠธ-only ์ถ”๋ก  ์‹คํ–‰")
print(f"๐Ÿ” [DEBUG] ์ƒ์„ฑ ์„ค์ •: {gen_config}")
# ํƒ€์ž„์•„์›ƒ ์„ค์ •์„ ์œ„ํ•œ ์ถ”๊ฐ€ ์„ค์ • (๋” ์ ์ ˆํ•œ ๊ฐ’์œผ๋กœ ์กฐ์ •)
# if 'max_time' not in gen_config:
# gen_config['max_time'] = 60.0 # 60์ดˆ ํƒ€์ž„์•„์›ƒ์œผ๋กœ ์กฐ์ •
# ์ถ”๊ฐ€ ํƒ€์ž„์•„์›ƒ ์„ค์ •
# gen_config['max_time'] = 60.0 # ๊ฐ•์ œ 60์ดˆ ํƒ€์ž„์•„์›ƒ
# print(f"๐Ÿ” [DEBUG] ๊ฐ•์ œ ํƒ€์ž„์•„์›ƒ ์„ค์ •: {gen_config['max_time']}์ดˆ")
# ์ถ”๊ฐ€ ์„ฑ๋Šฅ ์ตœ์ ํ™” ์„ค์ •
gen_config['use_cache'] = True # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
# PAD ํ† ํฐ ์„ค์ • - ๋ชจ๋ธ ํ”„๋กœํ•„ ์„ค์ • ์šฐ์„ 
if 'pad_token_id' not in gen_config:
# ํ”„๋กœํ•„์— ์„ค์ •์ด ์—†์„ ๋•Œ๋งŒ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
if tokenizer.pad_token_id is not None:
gen_config['pad_token_id'] = tokenizer.pad_token_id
print(f"๐Ÿ” [DEBUG] PAD ํ† ํฐ ์„ค์ •: ํ† ํฌ๋‚˜์ด์ € ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ (ID: {tokenizer.pad_token_id})")
else:
gen_config['pad_token_id'] = None
print(f"๐Ÿ” [DEBUG] PAD ํ† ํฐ ์„ค์ •: None (ํ† ํฌ๋‚˜์ด์ €์— PAD ํ† ํฐ ์—†์Œ)")
# ํ† ํฐ ์„ค์ • - ํ”„๋กœํ•„์—์„œ ์„ค์ •๋œ ๊ฐ’ ์šฐ์„  ์‚ฌ์šฉ
if 'eos_token_id' not in gen_config or gen_config['eos_token_id'] is None:
if tokenizer.eos_token_id is not None:
gen_config['eos_token_id'] = tokenizer.eos_token_id
print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ์„ค์ •: {tokenizer.eos_token_id}")
else:
gen_config['eos_token_id'] = None
print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ์„ค์ •: None (์ž๋™ ์ฒ˜๋ฆฌ)")
if 'pad_token_id' not in gen_config or gen_config['pad_token_id'] is None:
if tokenizer.pad_token_id is not None:
gen_config['pad_token_id'] = tokenizer.pad_token_id
else:
gen_config['pad_token_id'] = None
if 'bos_token_id' not in gen_config or gen_config['bos_token_id'] is None:
if hasattr(tokenizer, 'bos_token_id') and tokenizer.bos_token_id is not None:
gen_config['bos_token_id'] = tokenizer.bos_token_id
else:
gen_config['bos_token_id'] = None
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ํ† ํฐ ์„ค์ •: EOS={gen_config['eos_token_id']}, PAD={gen_config['pad_token_id']}, BOS={gen_config.get('bos_token_id')}")
# ์ƒ์„ฑ ์„ค์ • ์ตœ์ข… ํ™•์ธ
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ƒ์„ฑ ์„ค์ •: {gen_config}")
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘ - ํ…์ŠคํŠธ๋งŒ")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ž…๋ ฅ ํ…์„œ ๋””๋ฐ”์ด์Šค: {input_ids.device}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… attention_mask ๋””๋ฐ”์ด์Šค: {attention_mask.device}")
# ๋ชจ๋ธ ์ƒ์„ฑ ์ง„ํ–‰ ์ƒํ™ฉ ๋ชจ๋‹ˆํ„ฐ๋ง์„ ์œ„ํ•œ ์ฝœ๋ฐฑ ์ถ”๊ฐ€
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘ ์‹œ๊ฐ„: {time.time()}")
# LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ ์šฉ๋œ ๋ชจ๋ธ์ธ์ง€ ํ™•์ธ
if LORA_AVAILABLE and lora_manager and hasattr(lora_manager, 'current_adapter_name') and lora_manager.current_adapter_name:
print(f"๐Ÿ” [DEBUG] LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ๋จ: {lora_manager.current_adapter_name}")
# LoRA๊ฐ€ ์ ์šฉ๋œ ๋ชจ๋ธ ์‚ฌ์šฉ
lora_model = lora_manager.get_model()
if lora_model:
print(f"๐Ÿ” [DEBUG] LoRA ๋ชจ๋ธ๋กœ ์ƒ์„ฑ ์‹คํ–‰")
# LoRA ๋ชจ๋ธ์šฉ ์ž…๋ ฅ ์ฒ˜๋ฆฌ (token_type_ids ์ œ๊ฑฐ)
lora_inputs = {
'input_ids': input_ids,
'attention_mask': attention_mask
}
# token_type_ids๊ฐ€ ์žˆ๋‹ค๋ฉด ์ œ๊ฑฐ
# if 'token_type_ids' in locals() and token_type_ids is not None:
# print(f"๐Ÿ” [DEBUG] token_type_ids ์ œ๊ฑฐ๋จ (LoRA ๋ชจ๋ธ ํ˜ธํ™˜์„ฑ)")
generated_ids = lora_model.generate(
**lora_inputs,
**gen_config
)
else:
print(f"โš ๏ธ [DEBUG] LoRA ๋ชจ๋ธ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Œ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
generated_ids = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
**gen_config
)
else:
print(f"๐Ÿ” [DEBUG] LoRA ์–ด๋Œ‘ํ„ฐ ์—†์Œ, ๊ธฐ๋ณธ ๋ชจ๋ธ ์‚ฌ์šฉ")
# LoRA ์ƒํƒœ ๋””๋ฒ„๊น…
if LORA_AVAILABLE:
if lora_manager:
print(f"๐Ÿ” [DEBUG] LoRA ๋งค๋‹ˆ์ € ์กด์žฌ: {type(lora_manager)}")
if hasattr(lora_manager, 'current_adapter_name'):
print(f"๐Ÿ” [DEBUG] ํ˜„์žฌ ์–ด๋Œ‘ํ„ฐ: {lora_manager.current_adapter_name}")
if hasattr(lora_manager, 'base_model'):
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ๋จ: {lora_manager.base_model is not None}")
else:
print(f"๐Ÿ” [DEBUG] LoRA ๋งค๋‹ˆ์ €๊ฐ€ None")
else:
print(f"๐Ÿ” [DEBUG] LoRA ์ง€์› ์•ˆ๋จ")
generated_ids = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
**gen_config
)
# ํ† ํฐ ์„ค์ •์„ ๋ช…์‹œ์ ์œผ๋กœ ์ „๋‹ฌํ•˜์—ฌ EOS ํ† ํฐ ๋ฌธ์ œ ํ•ด๊ฒฐ
# generate_kwargs = {
# 'input_ids': input_ids.to(model.device),
# 'attention_mask': attention_mask.to(model.device),
# 'max_new_tokens': gen_config['max_new_tokens'],
# 'temperature': gen_config['temperature'],
# 'top_p': gen_config['top_p'],
# 'do_sample': gen_config['do_sample'],
# 'repetition_penalty': gen_config.get('repetition_penalty', 1.0),
# 'no_repeat_ngram_size': gen_config.get('no_repeat_ngram_size', 0),
# # 'num_beams': gen_config.get('num_beams', 1),
# 'use_cache': gen_config.get('use_cache', True),
# 'max_time': gen_config.get('max_time', None),
# 'early_stopping': gen_config.get('early_stopping', False),
# 'stopping_criteria': gen_config.get('stopping_criteria', None),
# }
#
# # ํ† ํฐ ID ์„ค์ • (์ค‘์š”!)
# if gen_config.get('eos_token_id') is not None:
# generate_kwargs['eos_token_id'] = gen_config['eos_token_id']
# if gen_config.get('pad_token_id') is not None:
# generate_kwargs['pad_token_id'] = gen_config['pad_token_id']
# if gen_config.get('bos_token_id') is not None:
# generate_kwargs['bos_token_id'] = gen_config['bos_token_id']
# print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ƒ์„ฑ ์„ค์ •: {generate_kwargs}")
# generated_ids = model.generate(**generate_kwargs)
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์™„๋ฃŒ ์‹œ๊ฐ„: {time.time()}")
t_gen_end = time.time()
print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ถ”๋ก  ์™„๋ฃŒ - ์†Œ์š”์‹œ๊ฐ„: {t_gen_end - t_gen_start:.3f}์ดˆ")
print(f"๐Ÿ” [DEBUG] ์ƒ์„ฑ๋œ ํ† ํฐ ์ˆ˜: {generated_ids.shape[1] - input_ids.shape[1]}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… generated_ids shape: {generated_ids.shape}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… generated_ids ๋””๋ฐ”์ด์Šค: {generated_ids.device}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… generated_ids dtype: {generated_ids.dtype}")
except Exception as e:
print(f"โŒ [DEBUG] ๋ชจ๋ธ ์ถ”๋ก  ์ค‘ ์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
print(f"โŒ [DEBUG] ์—๋Ÿฌ ํƒ€์ž…: {type(e).__name__}")
print(f"โŒ [DEBUG] ์—๋Ÿฌ ์ƒ์„ธ: {str(e)}")
import traceback
traceback.print_exc()
return {"error": f"Generation failed: {str(e)}"}
# --- 6. ์‘๋‹ต ์ถ”์ถœ ---
print(f"๐Ÿ” [DEBUG] ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘")
t_decode_start = time.time()
try:
# ์ƒ์„ฑ๋œ ํ…์ŠคํŠธ ๋””์ฝ”๋”ฉ
full_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(f"๐Ÿ” [DEBUG] ์ „์ฒด ํ…์ŠคํŠธ ๊ธธ์ด: {len(full_text)}")
print(f"๐Ÿ” [DEBUG] ์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
print(f"๐Ÿ” [DEBUG] ์‚ฌ์šฉ๋œ ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
# ํ”„๋กœํ•„๋ณ„ ์‘๋‹ต ์ถ”์ถœ (์•ˆ์ „ํ•œ ๋ฐฉ์‹)
if hasattr(current_profile, 'extract_response'):
try:
response = current_profile.extract_response(full_text, formatted_prompt)
print(f"๐Ÿ” [DEBUG] ํ”„๋กœํ•„ extract_response ์‚ฌ์šฉ ์„ฑ๊ณต")
except Exception as extract_error:
print(f"โš ๏ธ [DEBUG] ํ”„๋กœํ•„ extract_response ์‹คํŒจ: {extract_error}")
# ํด๋ฐฑ: ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”์ถœ
response = full_text.replace(formatted_prompt, "").strip() if formatted_prompt else full_text
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”์ถœ ์‚ฌ์šฉ (ํด๋ฐฑ)")
else:
# ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”์ถœ
response = full_text.replace(formatted_prompt, "").strip() if formatted_prompt else full_text
print(f"๐Ÿ” [DEBUG] ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”์ถœ ์‚ฌ์šฉ")
print(f"๐Ÿ” [DEBUG] ์ถ”์ถœ๋œ ์‘๋‹ต ๊ธธ์ด: {len(response)}")
print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์‘๋‹ต: {response}")
t_decode_end = time.time()
print(f"๐Ÿ” [DEBUG] ์‘๋‹ต ์ถ”์ถœ ์™„๋ฃŒ - ์†Œ์š”์‹œ๊ฐ„: {t_decode_end - t_decode_start:.3f}์ดˆ")
except Exception as e:
print(f"โŒ [DEBUG] ์‘๋‹ต ์ถ”์ถœ ์ค‘ ์—๋Ÿฌ ๋ฐœ์ƒ: {str(e)}")
import traceback
traceback.print_exc()
return {"error": f"Response extraction failed: {str(e)}"}
# --- 7. ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜ ---
total_time = time.time() - t_tok_start
print(f"๐Ÿ” [DEBUG] ์ „์ฒด ์ฒ˜๋ฆฌ ์™„๋ฃŒ - ์ด ์†Œ์š”์‹œ๊ฐ„: {total_time:.3f}์ดˆ")
# ๐Ÿ”„ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์™„๋ฃŒ (์ „์—ญ ๋ณ€์ˆ˜ ์ดˆ๊ธฐํ™”๋Š” ์ œ๊ฑฐ๋จ)
return {
"generated_text": response,
"processing_time": total_time,
"model_name": current_profile.display_name,
"image_processed": image_processed,
"tokens_generated": generated_ids.shape[1] - input_ids.shape[1],
"total_tokens": generated_ids.shape[1]
}
except Exception as e:
print(f"โŒ [DEBUG] generate_sync ์ „์ฒด ์—๋Ÿฌ: {str(e)}")
import traceback
traceback.print_exc()
return {"error": str(e)}
@app.get("/lora/status")
async def get_lora_status():
"""ํ˜„์žฌ LoRA ์ƒํƒœ ํ™•์ธ"""
try:
if not LORA_AVAILABLE or lora_manager is None:
return {"status": "error", "message": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค"}
return {
"status": "success",
"lora_available": True,
"current_adapter": lora_manager.current_adapter_name if hasattr(lora_manager, 'current_adapter_name') else None,
"base_model_loaded": hasattr(lora_manager, 'base_model') and lora_manager.base_model is not None,
"device": getattr(lora_manager, 'device', 'unknown')
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/status")
async def get_context_status():
"""์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์ƒํƒœ ํ™•์ธ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
# ์„ธ์…˜๋ณ„ ์ •๋ณด ์ˆ˜์ง‘
session_info = {}
for session_id, conversation in context_manager.session_conversations.items():
session_info[session_id] = {
"turns": len(conversation),
"user_messages": len([t for t in conversation if t.role == "user"]),
"assistant_messages": len([t for t in conversation if t.role == "assistant"])
}
return {
"status": "success",
"context_manager_available": True,
"total_sessions": len(context_manager.session_conversations),
"sessions": session_info,
"max_tokens": context_manager.max_tokens,
"max_turns": context_manager.max_turns,
"strategy": context_manager.strategy
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/history")
async def get_context_history(session_id: str = None):
"""์ปจํ…์ŠคํŠธ ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
if session_id:
# ํŠน์ • ์„ธ์…˜์˜ ์ปจํ…์ŠคํŠธ๋งŒ ์กฐํšŒ
context = context_manager.get_context(include_system=True, max_length=4000, session_id=session_id)
session_summary = context_manager.get_context_summary(session_id)
return {
"status": "success",
"session_id": session_id,
"context": context,
"history_length": session_summary.get("total_turns", 0),
"session_summary": session_summary
}
else:
# ์ „์ฒด ์ปจํ…์ŠคํŠธ ์กฐํšŒ
context = context_manager.get_context(include_system=True, max_length=4000)
return {
"status": "success",
"context": context,
"history_length": len(context_manager.conversation_history),
"all_sessions": True
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/auto-cleanup")
async def get_auto_cleanup_config():
"""์ž๋™ ์ •๋ฆฌ ์„ค์ • ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
config = context_manager.get_auto_cleanup_config()
return {
"status": "success",
"auto_cleanup_config": config
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/context/auto-cleanup")
async def set_auto_cleanup_config(
enabled: bool = Form(True),
interval_turns: int = Form(8),
interval_time: int = Form(300),
strategy: str = Form("smart")
):
"""์ž๋™ ์ •๋ฆฌ ์„ค์ • ๋ณ€๊ฒฝ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
context_manager.set_auto_cleanup_config(
enabled=enabled,
interval_turns=interval_turns,
interval_time=interval_time,
strategy=strategy
)
return {
"status": "success",
"message": "์ž๋™ ์ •๋ฆฌ ์„ค์ •์ด ์—…๋ฐ์ดํŠธ๋˜์—ˆ์Šต๋‹ˆ๋‹ค",
"new_config": context_manager.get_auto_cleanup_config()
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/context/cleanup/{session_id}")
async def manual_cleanup_session(session_id: str):
"""ํŠน์ • ์„ธ์…˜ ์ˆ˜๋™ ์ •๋ฆฌ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
# ์ˆ˜๋™ ์ •๋ฆฌ ์‹คํ–‰
context_manager._execute_auto_cleanup(session_id)
return {
"status": "success",
"message": f"์„ธ์…˜ {session_id} ์ˆ˜๋™ ์ •๋ฆฌ ์™„๋ฃŒ",
"session_id": session_id
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/context/cleanup-all")
async def manual_cleanup_all_sessions():
"""๋ชจ๋“  ์„ธ์…˜ ์ˆ˜๋™ ์ •๋ฆฌ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
# ๋ชจ๋“  ์„ธ์…˜์— ๋Œ€ํ•ด ์ˆ˜๋™ ์ •๋ฆฌ ์‹คํ–‰
for session_id in context_manager.session_conversations.keys():
context_manager._execute_auto_cleanup(session_id)
return {
"status": "success",
"message": "๋ชจ๋“  ์„ธ์…˜ ์ˆ˜๋™ ์ •๋ฆฌ ์™„๋ฃŒ"
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/api/v2/generate", response_model=GenerateResponse)
async def generate(request: Request,
prompt: str = Form(...),
image1: UploadFile = File(None),
image2: UploadFile = File(None),
image3: UploadFile = File(None),
image4: UploadFile = File(None),
user_id: str = Form("anonymous"),
room_id: str = Form("default"),
use_context: bool = Form(True),
session_id: str = Form(None)):
if not model_loaded:
raise HTTPException(status_code=503, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
start_time = time.time()
# ์„ธ์…˜ ID๊ฐ€ ์—†์œผ๋ฉด ์ž๋™ ์ƒ์„ฑ (์ฑ„ํŒ…๋ฐฉ๋ณ„ ๊ณ ์œ  ์„ธ์…˜)
if not session_id:
# ์ฑ„ํŒ…๋ฐฉ + ์‚ฌ์šฉ์ž + ํƒ€์ž„์Šคํƒฌํ”„ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ณ ์œ ํ•œ ์„ธ์…˜ ์ƒ์„ฑ
timestamp = int(time.time())
session_id = f"room_{room_id}_user_{user_id}_{timestamp}"
print(f"๐Ÿ” [DEBUG] ์ž๋™ ์„ธ์…˜ ID ์ƒ์„ฑ: {session_id} (์ฑ„ํŒ…๋ฐฉ: {room_id}, ์‚ฌ์šฉ์ž: {user_id})")
if use_context:
context_manager.add_user_message(prompt, metadata={"session_id": session_id})
print(f"๐Ÿ” [DEBUG] ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€๋จ (์„ธ์…˜: {session_id})")
# ์ด๋ฏธ์ง€ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
image_data_list = []
for img_file in [image1, image2, image3, image4]:
if img_file:
try:
data = await img_file.read()
image_data_list.append(data)
except Exception as e:
logger.warning(f"์ด๋ฏธ์ง€ ๋กœ๋“œ ์‹คํŒจ: {e}")
try:
# generate_sync ํ•จ์ˆ˜ ํ˜ธ์ถœ (์ปจํ…์ŠคํŠธ ํฌํ•จ)
result = generate_sync(prompt, image_data_list, use_context=use_context, session_id=session_id, user_id=user_id, room_id=room_id)
if "error" in result:
raise HTTPException(status_code=500, detail=result["error"])
if use_context:
context_manager.add_assistant_message(result["generated_text"], metadata={"session_id": session_id})
return GenerateResponse(
generated_text=result["generated_text"],
processing_time=result["processing_time"],
model_name=result["model_name"],
image_processed=result["image_processed"]
)
except Exception as e:
logger.error(f"โŒ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"๋ชจ๋ธ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
@app.post("/generate-multimodal", response_model=MultimodalGenerateResponse)
async def generate_multimodal(prompt: str = Form(...),
image: UploadFile = File(None),
model_id: Optional[str] = Form(None),
max_length: Optional[int] = Form(None),
temperature: Optional[float] = Form(None),
top_p: Optional[float] = Form(None),
do_sample: Optional[bool] = Form(None)):
global model_loaded, current_profile, model, tokenizer, processor
if not model_loaded:
raise HTTPException(status_code=500, detail="๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค")
start_time = time.time()
pil_image = None
if image:
try:
data = await image.read()
pil_image = Image.open(io.BytesIO(data)).convert("RGB")
except Exception as e:
logger.error(f"์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
try:
image_list = [pil_image] if pil_image else []
image_tokens = " ".join(["<image>"] * len(image_list)) if image_list else ""
conv = []
if image_list:
conv.append({"role": "user", "content": image_tokens})
conv.append({"role": "user", "content": prompt})
logger.info("=== STEP 1: building sample ===")
sample = {"image": [], "conv": [{"role": "user", "content": prompt}]}
logger.info("=== STEP 2: calling processor ===")
inputs = processor.batch_encode_collate([sample], padding_side='left', add_generation_prompt=True)
logger.info("=== STEP 3: processor returned ===")
for k, v in inputs.items():
if isinstance(v, torch.Tensor):
logger.info(f"Key {k}: tensor shape {v.shape}, dtype {v.dtype}, device {v.device}")
else:
logger.info(f"Key {k}: {type(v)}")
logger.info("=== STEP 4: moving to device ===")
inputs = {k: (v.to(model.device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
logger.info("=== STEP 5: moved to device ===")
eot_id = tokenizer.convert_tokens_to_ids("<|eot_id|>")
# Manual greedy decoding loop
generated = inputs["input_ids"].clone()
for _ in range(64):
# ๐Ÿ”„ torch import ๋ฌธ์ œ ํ•ด๊ฒฐ
import torch
with torch.no_grad():
out = model(**inputs)
next_token = out.logits[:, -1, :].argmax(dim=-1, keepdim=True)
generated = torch.cat([generated, next_token], dim=-1)
logger.info(f"Step token: {next_token.item()}")
if next_token.item() == eot_id:
break
inputs["input_ids"] = generated
logger.info(f"Final Generated IDs: {generated[0].tolist()}")
generated_text = tokenizer.decode(generated[0], skip_special_tokens=True)
if "<|im_start|>assistant" in generated_text:
response = generated_text.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
else:
response = generated_text.strip()
processing_time = time.time() - start_time
return MultimodalGenerateResponse(generated_text=response,
processing_time=processing_time,
model_name=current_profile.display_name,
model_id=model_id or current_profile.get_model_info().get("model_name"),
image_processed=bool(pil_image))
except Exception as e:
logger.error(f"โŒ ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
raise HTTPException(status_code=500, detail=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ƒ์„ฑ ์‹คํŒจ: {str(e)}")
@app.get("/api/v2/models")
async def list_models():
"""์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ ๋ชฉ๋ก"""
return {
"models": list_available_models(),
"current_model": current_profile.get_model_info() if current_profile else None
}
@app.post("/switch-model")
async def switch_model(model_id: str):
"""๋ชจ๋ธ ๋ณ€๊ฒฝ"""
try:
await load_model_async(model_id)
return {
"message": f"๋ชจ๋ธ ๋ณ€๊ฒฝ ์„ฑ๊ณต: {model_id}",
"current_model": current_profile.display_name
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"๋ชจ๋ธ ๋ณ€๊ฒฝ ์‹คํŒจ: {str(e)}")
@app.get("/", response_model=dict)
async def root():
"""๋ฃจํŠธ ์—”๋“œํฌ์ธํŠธ"""
return {
"message": "Lily LLM API v2 ์„œ๋ฒ„",
"version": "2.0.0",
"current_model": current_profile.display_name if current_profile else "None",
"docs": "/docs"
}
@app.get("/health", response_model=HealthResponse)
async def health_check():
"""ํ—ฌ์Šค ์ฒดํฌ ์—”๋“œํฌ์ธํŠธ"""
available_models = list_available_models()
return HealthResponse(
status="healthy",
model_loaded=model_loaded,
current_model=current_profile.display_name if current_profile else "None",
available_models=available_models
)
@app.post("/document/upload", response_model=DocumentUploadResponse)
async def upload_document(
file: UploadFile = File(...),
user_id: str = Form("default_user"), # ๊ธฐ๋ณธ ์‚ฌ์šฉ์ž ID
room_id: str = Form("default"), # ์ฑ„ํŒ…๋ฐฉ ID
document_id: Optional[str] = Form(None) # ๋ฌธ์„œ ID (์ž๋™ ์ƒ์„ฑ ๊ฐ€๋Šฅ)
):
"""๋ฌธ์„œ ์—…๋กœ๋“œ ๋ฐ RAG ์ฒ˜๋ฆฌ"""
start_time = time.time()
# document_id = None
try:
# ๋ฌธ์„œ ID ์ƒ์„ฑ (์ œ๊ณต๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
if not document_id:
import uuid
document_id = str(uuid.uuid4())[:8]
# ์ž„์‹œ ํŒŒ์ผ ์ €์žฅ
temp_file_path = f"./temp_{document_id}_{file.filename}"
with open(temp_file_path, "wb") as f:
content = await file.read()
f.write(content)
# ๋ฌธ์„œ ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ
result = rag_processor.process_and_store_document(
user_id, document_id, temp_file_path
)
# ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
import os
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
processing_time = time.time() - start_time
logger.info(f"๐Ÿ“„ ๋ฌธ์„œ ์—…๋กœ๋“œ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ): {file.filename}")
# ์ƒˆ๋กœ์šด ๋ฉ”๋ชจ๋ฆฌ ์‹œ์Šคํ…œ์— ๋ฌธ์„œ ์ •๋ณด ์ถ”๊ฐ€
if result["success"]:
try:
# ๋ฌธ์„œ ์ •๋ณด๋ฅผ ์ฑ„ํŒ…๋ฐฉ ์ปจํ…์ŠคํŠธ์— ์ถ”๊ฐ€
chunks = result.get("chunks", [])
chunk_count = len(chunks) if isinstance(chunks, list) else 0
document_info = {
"document_id": document_id,
"filename": file.filename,
"uploaded_by": user_id,
"document_type": file.filename.split('.')[-1].lower() if '.' in file.filename else "unknown",
"page_count": result.get("page_count", 0),
"chunk_count": chunk_count,
"summary": result.get("message", "")
}
# ํ†ตํ•ฉ ๋ฉ”๋ชจ๋ฆฌ ๊ด€๋ฆฌ์ž์— ๋ฌธ์„œ ์ถ”๊ฐ€
integrated_memory_manager.add_document_to_room(room_id, document_info)
# ์‚ฌ์šฉ์ž ํ†ต๊ณ„ ์—…๋ฐ์ดํŠธ
integrated_memory_manager.record_conversation(
user_id, room_id,
topic=f"๋ฌธ์„œ ์—…๋กœ๋“œ: {file.filename}"
)
logger.info(f"โœ… ๋ฉ”๋ชจ๋ฆฌ ์‹œ์Šคํ…œ์— ๋ฌธ์„œ ์ •๋ณด ์ถ”๊ฐ€ ์™„๋ฃŒ: {room_id} - {file.filename}")
except Exception as e:
logger.warning(f"โš ๏ธ ๋ฉ”๋ชจ๋ฆฌ ์‹œ์Šคํ…œ ์—…๋ฐ์ดํŠธ ์‹คํŒจ: {e}")
# ๋ฌธ์„œ ์—…๋กœ๋“œ ํ›„ ์ž๋™ AI ์‘๋‹ต ์ƒ์„ฑ ๋น„ํ™œ์„ฑํ™” (AI ๋ฆฌ์†Œ์Šค ์ ˆ์•ฝ)
# ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ์งˆ๋ฌธํ•  ๋•Œ๋งŒ AI ์‘๋‹ต ์ƒ์„ฑ
auto_generate_response = False
if result["success"]:
# ์ž๋™ AI ์š”์•ฝ ์—†์ด ๋ฌธ์„œ ์—…๋กœ๋“œ๋งŒ ์™„๋ฃŒ
result["auto_response"] = f"๋ฌธ์„œ '{file.filename}' ์—…๋กœ๋“œ ์™„๋ฃŒ! ์ด์ œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”."
logger.info(f"๐Ÿ“„ ์ž๋™ AI ์‘๋‹ต ์ƒ์„ฑ ๊ฑด๋„ˆ๋›ฐ๊ธฐ - AI ๋ฆฌ์†Œ์Šค ์ ˆ์•ฝ (์‚ฌ์šฉ์ž ์งˆ๋ฌธ ์‹œ์—๋งŒ AI ์‘๋‹ต)")
else:
result["auto_response"] = "๋ฌธ์„œ ์—…๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
return DocumentUploadResponse(
success=result["success"],
document_id=document_id,
message=result.get("message", ""),
chunks=result.get("chunks"),
latex_count=result.get("latex_count"),
error=result.get("error"),
auto_response=result.get("auto_response", "") # ์ž๋™ ์‘๋‹ต ์ถ”๊ฐ€
)
except Exception as e:
logger.error(f"โŒ ๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
return DocumentUploadResponse(
success=False,
document_id=document_id if 'document_id' in locals() else "unknown",
message="๋ฌธ์„œ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
error=str(e)
)
@app.post("/summarize/conversation")
async def summarize_conversation(
room_id: str = Form("default"),
user_id: str = Form("anonymous"),
max_length: int = Form(300)
):
"""๐Ÿ”„ summarizers๋ฅผ ํ™œ์šฉํ•œ ๋Œ€ํ™” ์š”์•ฝ ์ƒ์„ฑ"""
try:
if not text_summarizer.is_available():
return {
"success": False,
"message": "summarizers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
}
# ์Šค๋งˆํŠธ ๋Œ€ํ™” ์š”์•ฝ ์ƒ์„ฑ
success = integrated_memory_manager.create_smart_conversation_summary(
room_id, max_length
)
if success:
# ์—…๋ฐ์ดํŠธ๋œ ์ปจํ…์ŠคํŠธ ์กฐํšŒ
room_context = integrated_memory_manager.room_context_manager.get_room_context(room_id)
return {
"success": True,
"message": "๋Œ€ํ™” ์š”์•ฝ ์ƒ์„ฑ ์™„๋ฃŒ",
"summary": room_context.conversation_summary if room_context else "",
"key_topics": room_context.key_topics if room_context else [],
"room_id": room_id
}
else:
return {
"success": False,
"message": "๋Œ€ํ™” ์š”์•ฝ ์ƒ์„ฑ ์‹คํŒจ"
}
except Exception as e:
logger.error(f"โŒ ๋Œ€ํ™” ์š”์•ฝ ์ƒ์„ฑ ์‹คํŒจ: {e}")
return {
"success": False,
"message": f"๋Œ€ํ™” ์š”์•ฝ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
}
@app.post("/summarize/text")
async def summarize_text(
text: str = Form(...),
max_length: int = Form(200),
model_name: str = Form("kobart")
):
"""๐Ÿ”„ summarizers๋ฅผ ํ™œ์šฉํ•œ ํ…์ŠคํŠธ ์š”์•ฝ"""
try:
if not text_summarizer.is_available():
return {
"success": False,
"message": "summarizers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
}
if not text or len(text.strip()) < 50:
return {
"success": False,
"message": "์š”์•ฝํ•  ํ…์ŠคํŠธ๊ฐ€ ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค (์ตœ์†Œ 50์ž ํ•„์š”)"
}
# ์š”์•ฝ ์„ค์ •
config = SummaryConfig(
max_length=max_length,
min_length=max_length // 2,
do_sample=False,
temperature=0.7,
top_p=0.9
)
# ์š”์•ฝ ์ˆ˜ํ–‰
summary = text_summarizer.summarize_text(text, model_name, config)
if summary:
compression_ratio = len(summary) / len(text)
return {
"success": True,
"message": "ํ…์ŠคํŠธ ์š”์•ฝ ์™„๋ฃŒ",
"original_length": len(text),
"summary_length": len(summary),
"compression_ratio": round(compression_ratio, 2),
"summary": summary,
"model_used": model_name
}
else:
return {
"success": False,
"message": "์š”์•ฝ ์ƒ์„ฑ ์‹คํŒจ"
}
except Exception as e:
logger.error(f"โŒ ํ…์ŠคํŠธ ์š”์•ฝ ์‹คํŒจ: {e}")
return {
"success": False,
"message": f"ํ…์ŠคํŠธ ์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
}
@app.post("/compress/context")
async def compress_context(
room_id: str = Form("default"),
target_length: int = Form(800)
):
"""๐Ÿ”„ ์ฑ„ํŒ…๋ฐฉ ์ปจํ…์ŠคํŠธ ์••์ถ•"""
try:
if not text_summarizer.is_available():
return {
"success": False,
"message": "summarizers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
}
# ์ปจํ…์ŠคํŠธ ์••์ถ• ์ˆ˜ํ–‰
success = integrated_memory_manager.compress_room_context(room_id, target_length)
if success:
return {
"success": True,
"message": "์ปจํ…์ŠคํŠธ ์••์ถ• ์™„๋ฃŒ",
"room_id": room_id,
"target_length": target_length
}
else:
return {
"success": False,
"message": "์ปจํ…์ŠคํŠธ ์••์ถ• ์‹คํŒจ"
}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ์••์ถ• ์‹คํŒจ: {e}")
return {
"success": False,
"message": f"์ปจํ…์ŠคํŠธ ์••์ถ• ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
}
@app.get("/summarizer/status")
async def get_summarizer_status():
"""๐Ÿ”„ summarizers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์ƒํƒœ ํ™•์ธ"""
try:
available = text_summarizer.is_available()
models = text_summarizer.get_available_models() if available else []
return {
"success": True,
"summarizers_available": available,
"available_models": models,
"default_model": "hyunwoongko/kobart" if available else None
}
except Exception as e:
logger.error(f"โŒ summarizer ์ƒํƒœ ํ™•์ธ ์‹คํŒจ: {e}")
return {
"success": False,
"message": f"์ƒํƒœ ํ™•์ธ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
}
@app.post("/rag/generate", response_model=RAGResponse)
async def generate_rag_response(
query: str = Form(...),
user_id: str = Form("default_user"),
document_id: str = Form(...),
max_length: Optional[int] = Form(None),
temperature: Optional[float] = Form(None),
top_p: Optional[float] = Form(None),
do_sample: Optional[bool] = Form(None)
):
"""RAG ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ"""
start_time = time.time()
try:
# ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
llm_model = None
if model is not None and hasattr(model, 'generate_text'):
llm_model = model
logger.info("โœ… ๋กœ๋“œ๋œ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ RAG ์‘๋‹ต ์ƒ์„ฑ")
else:
logger.warning("โš ๏ธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•„ ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์‘๋‹ต๋งŒ ์ƒ์„ฑ")
# RAG ์‘๋‹ต ์ƒ์„ฑ
result = rag_processor.generate_rag_response(
user_id, document_id, query, llm_model=llm_model
)
processing_time = time.time() - start_time
logger.info(f"๐Ÿ” RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
return RAGResponse(
success=result["success"],
response=result["response"],
context=result["context"],
sources=result["sources"],
search_results=result["search_results"],
processing_time=processing_time
)
except Exception as e:
logger.error(f"โŒ RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
return RAGResponse(
success=False,
response=f"RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
context="",
sources=[],
search_results=0,
processing_time=0.0
)
@app.post("/rag/generate-hybrid", response_model=RAGResponse)
async def generate_hybrid_rag_response(
query: str = Form(...),
user_id: str = Form("default_user"),
document_id: str = Form(...),
image1: UploadFile = File(None),
image2: UploadFile = File(None),
image3: UploadFile = File(None),
image4: UploadFile = File(None),
image5: UploadFile = File(None),
max_length: Optional[int] = Form(None),
temperature: Optional[float] = Form(None),
top_p: Optional[float] = Form(None),
do_sample: Optional[bool] = Form(None)
):
"""ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ (์ด๋ฏธ์ง€ + ๋ฌธ์„œ)"""
start_time = time.time()
try:
# ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฒ˜๋ฆฌ
image_files = []
uploaded_images = [image1, image2, image3, image4, image5]
for i, img in enumerate(uploaded_images):
if img:
try:
# ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
import tempfile
with tempfile.NamedTemporaryFile(
suffix=f"_{i}.png",
delete=False,
prefix="hybrid_image_"
) as temp_file:
image_data = await img.read()
temp_file.write(image_data)
image_files.append(temp_file.name)
logger.info(f"๐Ÿ“ธ ์ด๋ฏธ์ง€ ์—…๋กœ๋“œ: {img.filename} -> {temp_file.name}")
except Exception as e:
logger.error(f"โŒ ์ด๋ฏธ์ง€ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
# RAG ์‘๋‹ต ์ƒ์„ฑ (์ด๋ฏธ์ง€ ํฌํ•จ)
result = rag_processor.generate_rag_response(
user_id, document_id, query,
llm_model=model, # ์‹ค์ œ ๋ชจ๋ธ ์ธ์Šคํ„ด์Šค ์‚ฌ์šฉ
image_files=image_files if image_files else None
)
# ์ž„์‹œ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ •๋ฆฌ
for temp_file in image_files:
try:
if os.path.exists(temp_file):
os.remove(temp_file)
logger.info(f"๐Ÿ—‘๏ธ ์ž„์‹œ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์‚ญ์ œ: {temp_file}")
except Exception as e:
logger.warning(f"โš ๏ธ ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ ์‹คํŒจ: {e}")
processing_time = time.time() - start_time
logger.info(f"๐Ÿ” ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
return RAGResponse(
success=result["success"],
response=result["response"],
context=result["context"],
sources=result["sources"],
search_results=result["search_results"],
processing_time=processing_time
)
except Exception as e:
logger.error(f"โŒ ํ•˜์ด๋ธŒ๋ฆฌ๋“œ RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
return RAGResponse(
success=False,
response=f"์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
context="",
sources=[],
search_results=0,
processing_time=time.time() - start_time
)
@app.get("/documents/{user_id}")
async def list_user_documents(user_id: str):
"""์‚ฌ์šฉ์ž์˜ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ"""
try:
from lily_llm_core.vector_store_manager import vector_store_manager
documents = vector_store_manager.get_all_documents(user_id)
return documents
except Exception as e:
logger.error(f"โŒ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์‹คํŒจ: {e}")
return {"documents": [], "total_docs": 0, "error": str(e)}
@app.delete("/document/{user_id}/{document_id}")
async def delete_document(user_id: str, document_id: str):
"""๋ฌธ์„œ ์‚ญ์ œ"""
try:
result = rag_processor.delete_document(user_id, document_id)
return result
except Exception as e:
logger.error(f"โŒ ๋ฌธ์„œ ์‚ญ์ œ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
# ์‚ฌ์šฉ์ž ๊ด€๋ฆฌ ์—”๋“œํฌ์ธํŠธ
@app.post("/user/create", response_model=UserResponse)
async def create_user(
user_id: str = Form(...),
username: Optional[str] = Form(None),
email: Optional[str] = Form(None)
):
"""์‚ฌ์šฉ์ž ์ƒ์„ฑ"""
try:
success = db_manager.add_user(user_id, username, email)
if success:
user_info = db_manager.get_user(user_id)
return UserResponse(
success=True,
user_id=user_id,
username=user_info.get('username') if user_info else None,
email=user_info.get('email') if user_info else None,
created_at=user_info.get('created_at') if user_info else None
)
else:
return UserResponse(success=False, user_id=user_id, error="์‚ฌ์šฉ์ž ์ƒ์„ฑ ์‹คํŒจ")
except Exception as e:
logger.error(f"โŒ ์‚ฌ์šฉ์ž ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
return UserResponse(success=False, user_id=user_id, error=str(e))
@app.get("/user/{user_id}", response_model=UserResponse)
async def get_user_info(user_id: str):
"""์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ"""
try:
user_info = db_manager.get_user(user_id)
if user_info:
return UserResponse(
success=True,
user_id=user_id,
username=user_info.get('username'),
email=user_info.get('email'),
created_at=user_info.get('created_at')
)
else:
return UserResponse(success=False, user_id=user_id, error="์‚ฌ์šฉ์ž๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
except Exception as e:
logger.error(f"โŒ ์‚ฌ์šฉ์ž ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
return UserResponse(success=False, user_id=user_id, error=str(e))
# ์„ธ์…˜ ๊ด€๋ฆฌ ์—”๋“œํฌ์ธํŠธ
@app.post("/session/create", response_model=SessionResponse)
async def create_session(
user_id: str = Form(...),
session_name: Optional[str] = Form(None)
):
"""์ฑ„ํŒ… ์„ธ์…˜ ์ƒ์„ฑ"""
try:
session_id = db_manager.create_chat_session(user_id, session_name)
if session_id:
return SessionResponse(
success=True,
session_id=session_id,
session_name=session_name
)
else:
return SessionResponse(success=False, session_id="", error="์„ธ์…˜ ์ƒ์„ฑ ์‹คํŒจ")
except Exception as e:
logger.error(f"โŒ ์„ธ์…˜ ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
return SessionResponse(success=False, session_id="", error=str(e))
@app.get("/sessions/{user_id}")
async def list_user_sessions(user_id: str):
"""์‚ฌ์šฉ์ž์˜ ์„ธ์…˜ ๋ชฉ๋ก ์กฐํšŒ"""
try:
sessions = db_manager.get_user_sessions(user_id)
return {
"success": True,
"user_id": user_id,
"sessions": sessions,
"total_sessions": len(sessions)
}
except Exception as e:
logger.error(f"โŒ ์„ธ์…˜ ๋ชฉ๋ก ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
return {"success": False, "error": str(e)}
# ์ฑ„ํŒ… ๋ฉ”์‹œ์ง€ ์—”๋“œํฌ์ธํŠธ
@app.post("/chat/message", response_model=ChatMessageResponse)
async def add_chat_message(
session_id: str = Form(...),
user_id: str = Form(...),
message_type: str = Form(...),
content: str = Form(...)
):
"""์ฑ„ํŒ… ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
try:
success = db_manager.add_chat_message(session_id, user_id, message_type, content)
if success:
return ChatMessageResponse(
success=True,
message_id=0, # ์‹ค์ œ ID๋Š” DB์—์„œ ์ž๋™ ์ƒ์„ฑ
content=content,
message_type=message_type,
timestamp=datetime.now().isoformat()
)
else:
return ChatMessageResponse(
success=False,
message_id=0,
content="",
message_type="",
timestamp="",
error="๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ ์‹คํŒจ"
)
except Exception as e:
logger.error(f"โŒ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ ์˜ค๋ฅ˜: {e}")
return ChatMessageResponse(
success=False,
message_id=0,
content="",
message_type="",
timestamp="",
error=str(e)
)
@app.get("/chat/history/{session_id}")
async def get_chat_history(session_id: str, limit: int = 50):
"""์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ"""
try:
messages = db_manager.get_chat_history(session_id, limit)
return {
"success": True,
"session_id": session_id,
"messages": messages,
"total_messages": len(messages)
}
except Exception as e:
logger.error(f"โŒ ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
return {"success": False, "error": str(e)}
# ๋ฌธ์„œ ๊ด€๋ฆฌ ์—”๋“œํฌ์ธํŠธ (DB ์—ฐ๋™)
@app.get("/documents/db/{user_id}")
async def list_user_documents_db(user_id: str):
"""์‚ฌ์šฉ์ž์˜ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ (DB ๊ธฐ๋ฐ˜)"""
try:
documents = db_manager.get_user_documents(user_id)
return {
"success": True,
"user_id": user_id,
"documents": documents,
"total_documents": len(documents)
}
except Exception as e:
logger.error(f"โŒ ๋ฌธ์„œ ๋ชฉ๋ก ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
return {"success": False, "error": str(e)}
# ์ธ์ฆ ์—”๋“œํฌ์ธํŠธ
@app.post("/auth/login", response_model=LoginResponse)
async def login(
user_id: str = Form(...),
password: str = Form(...)
):
"""์‚ฌ์šฉ์ž ๋กœ๊ทธ์ธ"""
try:
# ์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ
user_info = db_manager.get_user(user_id)
if not user_info:
return LoginResponse(success=False, error="์‚ฌ์šฉ์ž๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
# ๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฒ€์ฆ (๊ฐ„๋‹จํ•œ ๊ฒ€์ฆ - ์‹ค์ œ๋กœ๋Š” DB์— ์ €์žฅ๋œ ํ•ด์‹œ์™€ ๋น„๊ต)
if not auth_manager.verify_password(password, "dummy_hash"): # ์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” DB์˜ ํ•ด์‹œ์™€ ๋น„๊ต
return LoginResponse(success=False, error="๋น„๋ฐ€๋ฒˆํ˜ธ๊ฐ€ ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค")
# ํ† ํฐ ์ƒ์„ฑ
tokens = auth_manager.create_user_tokens(user_id, user_info.get('username'))
return LoginResponse(
success=True,
access_token=tokens['access_token'],
refresh_token=tokens['refresh_token'],
token_type=tokens['token_type'],
user_id=user_id,
username=user_info.get('username')
)
except Exception as e:
logger.error(f"โŒ ๋กœ๊ทธ์ธ ์˜ค๋ฅ˜: {e}")
return LoginResponse(success=False, error=str(e))
@app.post("/auth/refresh", response_model=TokenResponse)
async def refresh_token(refresh_token: str = Form(...)):
"""์•ก์„ธ์Šค ํ† ํฐ ๊ฐฑ์‹ """
try:
new_access_token = auth_manager.refresh_access_token(refresh_token)
if new_access_token:
return TokenResponse(
success=True,
access_token=new_access_token,
token_type="bearer"
)
else:
return TokenResponse(success=False, error="์œ ํšจํ•˜์ง€ ์•Š์€ ๋ฆฌํ”„๋ ˆ์‹œ ํ† ํฐ์ž…๋‹ˆ๋‹ค")
except Exception as e:
logger.error(f"โŒ ํ† ํฐ ๊ฐฑ์‹  ์˜ค๋ฅ˜: {e}")
return TokenResponse(success=False, error=str(e))
@app.post("/auth/register", response_model=LoginResponse)
async def register(
user_id: str = Form(...),
username: str = Form(...),
password: str = Form(...),
email: Optional[str] = Form(None)
):
"""์‚ฌ์šฉ์ž ๋“ฑ๋ก"""
try:
# ๊ธฐ์กด ์‚ฌ์šฉ์ž ํ™•์ธ
existing_user = db_manager.get_user(user_id)
if existing_user:
return LoginResponse(success=False, error="์ด๋ฏธ ์กด์žฌํ•˜๋Š” ์‚ฌ์šฉ์ž ID์ž…๋‹ˆ๋‹ค")
# ๋น„๋ฐ€๋ฒˆํ˜ธ ํ•ด์‹ฑ
hashed_password = auth_manager.hash_password(password)
# ์‚ฌ์šฉ์ž ์ƒ์„ฑ (์‹ค์ œ ๊ตฌํ˜„์—์„œ๋Š” hashed_password๋ฅผ DB์— ์ €์žฅ)
success = db_manager.add_user(user_id, username, email)
if success:
# ํ† ํฐ ์ƒ์„ฑ
tokens = auth_manager.create_user_tokens(user_id, username)
return LoginResponse(
success=True,
access_token=tokens['access_token'],
refresh_token=tokens['refresh_token'],
token_type=tokens['token_type'],
user_id=user_id,
username=username
)
else:
return LoginResponse(success=False, error="์‚ฌ์šฉ์ž ๋“ฑ๋ก์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค")
except Exception as e:
logger.error(f"โŒ ์‚ฌ์šฉ์ž ๋“ฑ๋ก ์˜ค๋ฅ˜: {e}")
return LoginResponse(success=False, error=str(e))
@app.get("/auth/me")
async def get_current_user_info(credentials: HTTPAuthorizationCredentials = Depends(auth_manager.security)):
"""ํ˜„์žฌ ์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ"""
try:
user_info = auth_manager.get_current_user(credentials)
return {
"success": True,
"user_id": user_info.get("sub"),
"username": user_info.get("username"),
"token_type": user_info.get("type")
}
except Exception as e:
logger.error(f"โŒ ์‚ฌ์šฉ์ž ์ •๋ณด ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
return {"success": False, "error": str(e)}
# WebSocket ์‹ค์‹œ๊ฐ„ ์ฑ„ํŒ… ์—”๋“œํฌ์ธํŠธ
@app.websocket("/ws/{user_id}")
async def websocket_endpoint(websocket: WebSocket, user_id: str, session_id: str = None):
"""WebSocket ์‹ค์‹œ๊ฐ„ ์ฑ„ํŒ… ์—”๋“œํฌ์ธํŠธ"""
try:
# ์—ฐ๊ฒฐ ์ˆ˜๋ฝ
await connection_manager.connect(websocket, user_id, session_id)
# ์—ฐ๊ฒฐ ์ƒํƒœ ๋ธŒ๋กœ๋“œ์บ์ŠคํŠธ
await connection_manager.broadcast_message({
"type": "user_connected",
"user_id": user_id,
"session_id": session_id,
"timestamp": datetime.now().isoformat()
}, exclude_user=user_id)
# ๋ฉ”์‹œ์ง€ ์ˆ˜์‹  ๋ฃจํ”„
while True:
try:
# ๋ฉ”์‹œ์ง€ ์ˆ˜์‹ 
data = await websocket.receive_text()
message_data = json.loads(data)
# ๋ฉ”์‹œ์ง€ ํƒ€์ž…์— ๋”ฐ๋ฅธ ์ฒ˜๋ฆฌ
message_type = message_data.get("type", "chat")
if message_type == "chat":
# ์ฑ„ํŒ… ๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ
content = message_data.get("content", "")
session_id = message_data.get("session_id")
# DB์— ๋ฉ”์‹œ์ง€ ์ €์žฅ
if session_id:
db_manager.add_chat_message(
session_id=session_id,
user_id=user_id,
message_type="user",
content=content
)
# ์„ธ์…˜์˜ ๋‹ค๋ฅธ ์‚ฌ์šฉ์ž๋“ค์—๊ฒŒ ๋ฉ”์‹œ์ง€ ์ „์†ก
await connection_manager.send_session_message({
"type": "chat_message",
"user_id": user_id,
"content": content,
"session_id": session_id,
"timestamp": datetime.now().isoformat()
}, session_id, exclude_user=user_id)
# AI ์‘๋‹ต ์ƒ์„ฑ (์„ ํƒ์ )
if message_data.get("generate_ai_response", False):
# AI ์‘๋‹ต ์ƒ์„ฑ ๋กœ์ง
ai_response = await generate_ai_response(content, user_id)
# AI ์‘๋‹ต์„ DB์— ์ €์žฅ
if session_id:
db_manager.add_chat_message(
session_id=session_id,
user_id="ai_assistant",
message_type="assistant",
content=ai_response
)
# AI ์‘๋‹ต์„ ์„ธ์…˜ ์‚ฌ์šฉ์ž๋“ค์—๊ฒŒ ์ „์†ก
await connection_manager.send_session_message({
"type": "ai_response",
"user_id": "ai_assistant",
"content": ai_response,
"session_id": session_id,
"timestamp": datetime.now().isoformat()
}, session_id)
elif message_type == "typing":
# ํƒ€์ดํ•‘ ์ƒํƒœ ์ „์†ก
await connection_manager.send_session_message({
"type": "user_typing",
"user_id": user_id,
"session_id": message_data.get("session_id"),
"timestamp": datetime.now().isoformat()
}, message_data.get("session_id"), exclude_user=user_id)
elif message_type == "join_session":
# ์„ธ์…˜ ์ฐธ์—ฌ
new_session_id = message_data.get("session_id")
if new_session_id:
# ๊ธฐ์กด ์„ธ์…˜์—์„œ ์ œ๊ฑฐ
if user_id in connection_manager.connection_info:
old_session_id = connection_manager.connection_info[user_id].get("session_id")
if old_session_id and old_session_id in connection_manager.session_connections:
connection_manager.session_connections[old_session_id].discard(user_id)
# ์ƒˆ ์„ธ์…˜์— ์ถ”๊ฐ€
if new_session_id not in connection_manager.session_connections:
connection_manager.session_connections[new_session_id] = set()
connection_manager.session_connections[new_session_id].add(user_id)
# ์—ฐ๊ฒฐ ์ •๋ณด ์—…๋ฐ์ดํŠธ
if user_id in connection_manager.connection_info:
connection_manager.connection_info[user_id]["session_id"] = new_session_id
# ์„ธ์…˜ ์ฐธ์—ฌ ์•Œ๋ฆผ
await connection_manager.send_session_message({
"type": "user_joined_session",
"user_id": user_id,
"session_id": new_session_id,
"timestamp": datetime.now().isoformat()
}, new_session_id, exclude_user=user_id)
logger.info(f"๐Ÿ“จ WebSocket ๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ: {user_id} - {message_type}")
except WebSocketDisconnect:
logger.info(f"๐Ÿ”Œ WebSocket ์—ฐ๊ฒฐ ๋Š๊น€: {user_id}")
break
except json.JSONDecodeError:
logger.warning(f"โš ๏ธ ์ž˜๋ชป๋œ JSON ํ˜•์‹: {user_id}")
await websocket.send_text(json.dumps({
"type": "error",
"message": "์ž˜๋ชป๋œ ๋ฉ”์‹œ์ง€ ํ˜•์‹์ž…๋‹ˆ๋‹ค."
}))
except Exception as e:
logger.error(f"โŒ WebSocket ๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
await websocket.send_text(json.dumps({
"type": "error",
"message": "๋ฉ”์‹œ์ง€ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค."
}))
except WebSocketDisconnect:
logger.info(f"๐Ÿ”Œ WebSocket ์—ฐ๊ฒฐ ๋Š๊น€: {user_id}")
except Exception as e:
logger.error(f"โŒ WebSocket ์—”๋“œํฌ์ธํŠธ ์˜ค๋ฅ˜: {e}")
finally:
# ์—ฐ๊ฒฐ ํ•ด์ œ
connection_manager.disconnect(user_id)
# ์—ฐ๊ฒฐ ํ•ด์ œ ์•Œ๋ฆผ
await connection_manager.broadcast_message({
"type": "user_disconnected",
"user_id": user_id,
"timestamp": datetime.now().isoformat()
}, exclude_user=user_id)
async def generate_ai_response(content: str, user_id: str) -> str:
"""AI ์‘๋‹ต ์ƒ์„ฑ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)"""
try:
# ํ˜„์žฌ ๋ชจ๋ธ๋กœ ์‘๋‹ต ์ƒ์„ฑ
response = await generate_sync(content, user_id)
return response.get("response", "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
except Exception as e:
logger.error(f"โŒ AI ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
return "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค. ์‘๋‹ต์„ ์ƒ์„ฑํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
# WebSocket ์ƒํƒœ ์กฐํšŒ ์—”๋“œํฌ์ธํŠธ
@app.get("/ws/status")
async def get_websocket_status():
"""WebSocket ์—ฐ๊ฒฐ ์ƒํƒœ ์กฐํšŒ"""
return {
"active_connections": connection_manager.get_connection_count(),
"active_users": connection_manager.get_active_users(),
"sessions": list(connection_manager.session_connections.keys())
}
# Celery ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์—”๋“œํฌ์ธํŠธ
@app.post("/tasks/document/process")
async def start_document_processing(
user_id: str = Form(...),
document_id: str = Form(...),
file_path: str = Form(...),
file_type: str = Form(...)
):
"""๋ฌธ์„œ ์ฒ˜๋ฆฌ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
try:
task = process_document_async.delay(user_id, document_id, file_path, file_type)
return {
"success": True,
"task_id": task.id,
"status": "started",
"message": "๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
@app.post("/tasks/ai/generate")
async def start_ai_generation(
user_id: str = Form(...),
session_id: str = Form(...),
prompt: str = Form(...),
model_id: Optional[str] = Form(None)
):
"""AI ์‘๋‹ต ์ƒ์„ฑ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
try:
task = generate_ai_response_async.delay(user_id, session_id, prompt, model_id)
return {
"success": True,
"task_id": task.id,
"status": "started",
"message": "AI ์‘๋‹ต ์ƒ์„ฑ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ AI ์‘๋‹ต ์ƒ์„ฑ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
@app.post("/tasks/rag/query")
async def start_rag_query(
user_id: str = Form(...),
query: str = Form(...),
document_id: str = Form(...)
):
"""RAG ์ฟผ๋ฆฌ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
try:
task = rag_query_async.delay(user_id, query, document_id)
return {
"success": True,
"task_id": task.id,
"status": "started",
"message": "RAG ์ฟผ๋ฆฌ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ RAG ์ฟผ๋ฆฌ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
@app.post("/tasks/documents/batch")
async def start_batch_processing(
user_id: str = Form(...),
document_ids: str = Form(...) # JSON ๋ฌธ์ž์—ด๋กœ ์ „๋‹ฌ
):
"""๋ฌธ์„œ ์ผ๊ด„ ์ฒ˜๋ฆฌ ๋ฐฑ๊ทธ๋ผ์šด๋“œ ์ž‘์—… ์‹œ์ž‘"""
try:
import json
doc_ids = json.loads(document_ids)
task = batch_process_documents_async.delay(user_id, doc_ids)
return {
"success": True,
"task_id": task.id,
"status": "started",
"message": f"๋ฌธ์„œ ์ผ๊ด„ ์ฒ˜๋ฆฌ ์ž‘์—…์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ({len(doc_ids)}๊ฐœ ๋ฌธ์„œ)"
}
except Exception as e:
logger.error(f"โŒ ๋ฌธ์„œ ์ผ๊ด„ ์ฒ˜๋ฆฌ ์ž‘์—… ์‹œ์ž‘ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
@app.get("/tasks/{task_id}")
async def get_task_status_endpoint(task_id: str):
"""์ž‘์—… ์ƒํƒœ ์กฐํšŒ"""
try:
status = get_task_status(task_id)
if status:
return {
"success": True,
"task_id": task_id,
"status": status["status"],
"result": status["result"],
"info": status["info"]
}
else:
return {
"success": False,
"error": "์ž‘์—…์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ ์ž‘์—… ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
@app.delete("/tasks/{task_id}")
async def cancel_task_endpoint(task_id: str):
"""์ž‘์—… ์ทจ์†Œ"""
try:
success = cancel_task(task_id)
if success:
return {
"success": True,
"task_id": task_id,
"message": "์ž‘์—…์ด ์ทจ์†Œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
else:
return {
"success": False,
"error": "์ž‘์—… ์ทจ์†Œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ ์ž‘์—… ์ทจ์†Œ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
# ์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์—”๋“œํฌ์ธํŠธ
@app.post("/monitoring/start")
async def start_performance_monitoring():
"""์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘"""
try:
performance_monitor.start_monitoring()
return {"message": "์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง์ด ์‹œ์ž‘๋˜์—ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘ ์‹คํŒจ: {e}")
raise HTTPException(status_code=500, detail=f"๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘ ์‹คํŒจ: {str(e)}")
@app.post("/monitoring/stop")
async def stop_performance_monitoring():
"""์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง ์ค‘์ง€"""
try:
performance_monitor.stop_monitoring()
return {"message": "์„ฑ๋Šฅ ๋ชจ๋‹ˆํ„ฐ๋ง์ด ์ค‘์ง€๋˜์—ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"๋ชจ๋‹ˆํ„ฐ๋ง ์ค‘์ง€ ์‹คํŒจ: {e}")
raise HTTPException(status_code=500, detail=f"๋ชจ๋‹ˆํ„ฐ๋ง ์ค‘์ง€ ์‹คํŒจ: {str(e)}")
@app.get("/monitoring/status")
async def get_monitoring_status():
"""๋ชจ๋‹ˆํ„ฐ๋ง ์ƒํƒœ ์กฐํšŒ"""
try:
summary = performance_monitor.get_performance_summary()
return summary
except Exception as e:
logger.error(f"๋ชจ๋‹ˆํ„ฐ๋ง ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {e}")
raise HTTPException(status_code=500, detail=f"๋ชจ๋‹ˆํ„ฐ๋ง ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {str(e)}")
@app.get("/monitoring/health")
async def get_system_health():
"""์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ์กฐํšŒ"""
try:
health = performance_monitor.get_system_health()
return {
"status": health.status,
"cpu_health": health.cpu_health,
"memory_health": health.memory_health,
"disk_health": health.disk_health,
"network_health": health.network_health,
"recommendations": health.recommendations
}
except Exception as e:
logger.error(f"์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {e}")
raise HTTPException(status_code=500, detail=f"์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ์กฐํšŒ ์‹คํŒจ: {str(e)}")
@app.post("/monitoring/export")
async def export_performance_metrics(file_path: str = "performance_metrics.json"):
"""์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ ๋‚ด๋ณด๋‚ด๊ธฐ"""
try:
performance_monitor.export_metrics(file_path)
return {"message": f"์„ฑ๋Šฅ ๋ฉ”ํŠธ๋ฆญ์ด {file_path}์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"๋ฉ”ํŠธ๋ฆญ ๋‚ด๋ณด๋‚ด๊ธฐ ์‹คํŒจ: {e}")
raise HTTPException(status_code=500, detail=f"๋ฉ”ํŠธ๋ฆญ ๋‚ด๋ณด๋‚ด๊ธฐ ์‹คํŒจ: {str(e)}")
# ============================================================================
# ์ด๋ฏธ์ง€ OCR ์ „์šฉ API ์—”๋“œํฌ์ธํŠธ (๊ธฐ์กด ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์‹œ์Šคํ…œ๊ณผ ์™„์ „ํžˆ ๋ถ„๋ฆฌ)
# ============================================================================
@app.post("/image-ocr/upload", response_model=DocumentUploadResponse)
async def upload_image_document(
file: UploadFile = File(...),
user_id: str = Form("default_user"),
document_id: Optional[str] = Form(None)
):
"""์ด๋ฏธ์ง€ OCR ์ „์šฉ ๋ฌธ์„œ ์—…๋กœ๋“œ"""
start_time = time.time()
try:
# ๋ฌธ์„œ ID ์ƒ์„ฑ (์ œ๊ณต๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
if not document_id:
import uuid
document_id = str(uuid.uuid4())[:8]
# ์ž„์‹œ ํŒŒ์ผ ์ €์žฅ
temp_file_path = f"./temp_image_{document_id}_{file.filename}"
with open(temp_file_path, "wb") as f:
content = await file.read()
f.write(content)
# ์ด๋ฏธ์ง€ OCR ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ
result = image_rag_processor.process_and_store_image_document(
user_id, document_id, temp_file_path
)
# ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
import os
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
processing_time = time.time() - start_time
logger.info(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ): {file.filename}")
return DocumentUploadResponse(
success=result["success"],
document_id=document_id,
message=result.get("message", ""),
chunks=result.get("chunks"),
latex_count=result.get("latex_count"),
error=result.get("error"),
auto_response=result.get("auto_response", "")
)
except Exception as e:
logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
return DocumentUploadResponse(
success=False,
document_id=document_id if 'document_id' in locals() else "unknown",
message="์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
error=str(e)
)
@app.post("/image-ocr/generate", response_model=RAGResponse)
async def generate_image_ocr_response(
query: str = Form(...),
user_id: str = Form("default_user"),
document_id: str = Form(...)
):
"""์ด๋ฏธ์ง€ OCR ๊ธฐ๋ฐ˜ RAG ์‘๋‹ต ์ƒ์„ฑ"""
start_time = time.time()
try:
# ์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ
result = image_rag_processor.generate_image_rag_response(
user_id, document_id, query
)
processing_time = time.time() - start_time
result["processing_time"] = processing_time
logger.info(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
return result
except Exception as e:
logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
return RAGResponse(
success=False,
response=f"์ด๋ฏธ์ง€ OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
context="",
sources=[],
search_results=0,
processing_time=time.time() - start_time
)
@app.get("/image-ocr/document/{user_id}/{document_id}")
async def get_image_document_info(user_id: str, document_id: str):
"""์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ"""
try:
result = image_rag_processor.get_image_document_info(user_id, document_id)
return result
except Exception as e:
logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
@app.delete("/image-ocr/document/{user_id}/{document_id}")
async def delete_image_document(user_id: str, document_id: str):
"""์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์‚ญ์ œ"""
try:
# ๋ฒกํ„ฐ ์Šคํ† ์–ด์—์„œ ๋ฌธ์„œ ์‚ญ์ œ
success = vector_store_manager.delete_document(user_id, document_id)
if success:
return {
"success": True,
"message": "์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ๊ฐ€ ์‚ญ์ œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
else:
return {
"success": False,
"error": "์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์‚ญ์ œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ ์ด๋ฏธ์ง€ OCR ๋ฌธ์„œ ์‚ญ์ œ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
# ============================================================================
# LaTeX-OCR ์ „์šฉ API ์—”๋“œํฌ์ธํŠธ (์ˆ˜ํ•™ ์ˆ˜์‹ ์ธ์‹ ๊ธฐ๋Šฅ ํฌํ•จ)
# ============================================================================
@app.post("/latex-ocr/upload", response_model=DocumentUploadResponse)
async def upload_latex_document(
file: UploadFile = File(...),
user_id: str = Form("default_user"),
document_id: Optional[str] = Form(None)
):
"""LaTeX-OCR ์ „์šฉ ๋ฌธ์„œ ์—…๋กœ๋“œ"""
start_time = time.time()
try:
# ๋ฌธ์„œ ID ์ƒ์„ฑ (์ œ๊ณต๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ)
if not document_id:
import uuid
document_id = str(uuid.uuid4())[:8]
# ์ž„์‹œ ํŒŒ์ผ ์ €์žฅ
temp_file_path = f"./temp_latex_{document_id}_{file.filename}"
with open(temp_file_path, "wb") as f:
content = await file.read()
f.write(content)
# LaTeX-OCR ์ฒ˜๋ฆฌ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด์— ์ €์žฅ
result = latex_rag_processor.process_and_store_latex_document(
user_id, document_id, temp_file_path
)
# ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
import os
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
processing_time = time.time() - start_time
logger.info(f"๐Ÿงฎ LaTeX-OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ): {file.filename}")
return DocumentUploadResponse(
success=result["success"],
document_id=document_id,
message=result.get("message", ""),
chunks=result.get("chunks"),
latex_count=result.get("latex_count"),
error=result.get("error"),
auto_response=result.get("auto_response", "")
)
except Exception as e:
logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์‹คํŒจ: {e}")
return DocumentUploadResponse(
success=False,
document_id=document_id if 'document_id' in locals() else "unknown",
message="LaTeX-OCR ๋ฌธ์„œ ์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
error=str(e)
)
@app.post("/latex-ocr/generate", response_model=RAGResponse)
async def generate_latex_ocr_response(
query: str = Form(...),
user_id: str = Form("default_user"),
document_id: str = Form(...)
):
"""LaTeX-OCR ๊ธฐ๋ฐ˜ RAG ์‘๋‹ต ์ƒ์„ฑ"""
start_time = time.time()
try:
# LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ
result = latex_rag_processor.generate_latex_rag_response(
user_id, document_id, query
)
processing_time = time.time() - start_time
result["processing_time"] = processing_time
logger.info(f"๐Ÿงฎ LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์™„๋ฃŒ ({processing_time:.2f}์ดˆ)")
return result
except Exception as e:
logger.error(f"โŒ LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ: {e}")
return RAGResponse(
success=False,
response=f"LaTeX-OCR RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
context="",
sources=[],
search_results=0,
processing_time=time.time() - start_time
)
@app.get("/latex-ocr/document/{user_id}/{document_id}")
async def get_latex_document_info(user_id: str, document_id: str):
"""LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ"""
try:
result = latex_rag_processor.get_latex_document_info(user_id, document_id)
return result
except Exception as e:
logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
@app.delete("/latex-ocr/document/{user_id}/{document_id}")
async def delete_latex_document(user_id: str, document_id: str):
"""LaTeX-OCR ๋ฌธ์„œ ์‚ญ์ œ"""
try:
# ๋ฒกํ„ฐ ์Šคํ† ์–ด์—์„œ ๋ฌธ์„œ ์‚ญ์ œ
success = vector_store_manager.delete_document(user_id, document_id)
if success:
return {
"success": True,
"message": "LaTeX-OCR ๋ฌธ์„œ๊ฐ€ ์‚ญ์ œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
else:
return {
"success": False,
"error": "LaTeX-OCR ๋ฌธ์„œ ์‚ญ์ œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ LaTeX-OCR ๋ฌธ์„œ ์‚ญ์ œ ์‹คํŒจ: {e}")
return {
"success": False,
"error": str(e)
}
# ============================================================================
# LaTeX-OCR + FAISS ํ†ตํ•ฉ ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
# ============================================================================
# # LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”
# latex_ocr_faiss_simple = None
# latex_ocr_faiss_integrated = None
# def init_latex_ocr_faiss_systems():
# """LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”"""
# global latex_ocr_faiss_simple, latex_ocr_faiss_integrated
# try:
# latex_ocr_faiss_simple = LatexOCRFAISSSimple()
# latex_ocr_faiss_integrated = LatexOCRFAISSIntegrated()
# logger.info("โœ… LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
# except Exception as e:
# logger.error(f"โŒ LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
@app.post("/latex-ocr-faiss/process", response_model=DocumentUploadResponse)
async def process_pdf_with_latex_faiss(
file: UploadFile = File(...),
user_id: str = Form("default_user"),
system_type: str = Form("simple") # "simple" ๋˜๋Š” "integrated"
):
"""PDF์—์„œ LaTeX ์ˆ˜์‹ ์ถ”์ถœ ๋ฐ FAISS ์ €์žฅ"""
try:
# ํŒŒ์ผ ์ €์žฅ
upload_dir = Path("uploads/latex_ocr_faiss")
upload_dir.mkdir(parents=True, exist_ok=True)
file_path = upload_dir / f"{user_id}_{file.filename}"
with open(file_path, "wb") as f:
content = await file.read()
f.write(content)
# ์‹œ์Šคํ…œ ์„ ํƒ (์ฃผ์„ ์ฒ˜๋ฆฌ๋จ - ์‚ญ์ œ๋œ ๋ชจ๋“ˆ)
# if system_type == "simple":
# if not latex_ocr_faiss_simple:
# init_latex_ocr_faiss_systems()
# system = latex_ocr_faiss_simple
# else:
# if not latex_ocr_faiss_integrated:
# init_latex_ocr_faiss_systems()
# system = latex_ocr_faiss_integrated
# ์ž„์‹œ ์ฒ˜๋ฆฌ - ๊ธฐ๋Šฅ ๋น„ํ™œ์„ฑํ™”
return DocumentUploadResponse(
success=False,
document_id="",
message="LaTeX-OCR + FAISS ๊ธฐ๋Šฅ์ด ํ˜„์žฌ ๋น„ํ™œ์„ฑํ™”๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค",
error="์‚ญ์ œ๋œ ๋ชจ๋“ˆ๋กœ ์ธํ•ด ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋จ"
)
# PDF ์ฒ˜๋ฆฌ (์ฃผ์„ ์ฒ˜๋ฆฌ๋จ)
# result = system.process_pdf_with_latex(str(file_path), user_id)
if result["success"]:
return DocumentUploadResponse(
success=True,
document_id=f"latex_ocr_faiss_{user_id}_{file.filename}",
message=f"LaTeX ์ˆ˜์‹ {result['latex_count']}๊ฐœ ์ถ”์ถœ ์™„๋ฃŒ",
chunks=result['latex_count'],
latex_count=result['latex_count']
)
else:
return DocumentUploadResponse(
success=False,
document_id="",
message="LaTeX ์ˆ˜์‹ ์ถ”์ถœ ์‹คํŒจ",
error=result.get("error", "LaTeX ์ˆ˜์‹ ์ถ”์ถœ ์‹คํŒจ")
)
except Exception as e:
logger.error(f"LaTeX-OCR + FAISS ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
return DocumentUploadResponse(
success=False,
document_id="",
message="์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค",
error=f"์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
)
@app.post("/latex-ocr-faiss/search", response_model=RAGResponse)
async def search_latex_formulas(
query: str = Form(...),
user_id: str = Form("default_user"),
document_path: Optional[str] = Form(None),
system_type: str = Form("simple"),
k: int = Form(5)
):
"""์ €์žฅ๋œ LaTeX ์ˆ˜์‹ ๊ฒ€์ƒ‰"""
try:
# ์‹œ์Šคํ…œ ์„ ํƒ (์ฃผ์„ ์ฒ˜๋ฆฌ๋จ - ์‚ญ์ œ๋œ ๋ชจ๋“ˆ)
# if system_type == "simple":
# if not latex_ocr_faiss_simple:
# init_latex_ocr_faiss_systems()
# system = latex_ocr_faiss_simple
# else:
# if not latex_ocr_faiss_integrated:
# init_latex_ocr_faiss_systems()
# system = latex_ocr_faiss_integrated
# ์ž„์‹œ ์ฒ˜๋ฆฌ - ๊ธฐ๋Šฅ ๋น„ํ™œ์„ฑํ™”
return RAGResponse(
success=False,
response="LaTeX-OCR + FAISS ๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ์ด ํ˜„์žฌ ๋น„ํ™œ์„ฑํ™”๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค",
context="",
sources=[],
search_results=0,
processing_time=0.0,
error="์‚ญ์ œ๋œ ๋ชจ๋“ˆ๋กœ ์ธํ•ด ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋จ"
)
# ์ˆ˜์‹ ๊ฒ€์ƒ‰ (์ฃผ์„ ์ฒ˜๋ฆฌ๋จ)
# search_result = system.search_formulas(query, user_id, document_path, k)
if search_result["success"]:
# ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์‘๋‹ต ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
context = "\n".join([f"์ˆ˜์‹: {result['formula']} (์œ ์‚ฌ๋„: {result['similarity']:.3f})"
for result in search_result['results']])
sources = [{"formula": result['formula'], "similarity": result['similarity'],
"page": result.get('page', 1)} for result in search_result['results']]
return RAGResponse(
success=True,
response=f"๊ฒ€์ƒ‰๋œ ์ˆ˜์‹ {search_result['search_results']}๊ฐœ๋ฅผ ์ฐพ์•˜์Šต๋‹ˆ๋‹ค.",
context=context,
sources=sources,
search_results=search_result['search_results'],
processing_time=0.0 # ์‹ค์ œ ์ฒ˜๋ฆฌ ์‹œ๊ฐ„ ์ธก์ • ํ•„์š”
)
else:
return RAGResponse(
success=False,
response="์ˆ˜์‹ ๊ฒ€์ƒ‰์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.",
context="",
sources=[],
search_results=0,
processing_time=0.0,
error=search_result.get("error", "๊ฒ€์ƒ‰ ์‹คํŒจ")
)
except Exception as e:
logger.error(f"LaTeX ์ˆ˜์‹ ๊ฒ€์ƒ‰ ์˜ค๋ฅ˜: {e}")
return RAGResponse(
success=False,
response="๊ฒ€์ƒ‰ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.",
context="",
sources=[],
search_results=0,
processing_time=0.0,
error=str(e)
)
@app.get("/latex-ocr-faiss/status")
async def get_latex_ocr_faiss_status():
"""LaTeX-OCR + FAISS ์‹œ์Šคํ…œ ์ƒํƒœ ํ™•์ธ"""
try:
# ์ฃผ์„ ์ฒ˜๋ฆฌ๋จ - ์‚ญ์ œ๋œ ๋ชจ๋“ˆ
# simple_status = latex_ocr_faiss_simple is not None
# integrated_status = latex_ocr_faiss_integrated is not None
# return {
# "simple_system_initialized": simple_status,
# "integrated_system_initialized": integrated_status,
# "status": "ready" if (simple_status or integrated_status) else "not_initialized"
# }
return {
"simple_system_initialized": False,
"integrated_system_initialized": False,
"status": "disabled",
"message": "LaTeX-OCR + FAISS ๊ธฐ๋Šฅ์ด ํ˜„์žฌ ๋น„ํ™œ์„ฑํ™”๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค"
}
except Exception as e:
logger.error(f"์ƒํƒœ ํ™•์ธ ์˜ค๋ฅ˜: {e}")
return {"status": "error", "error": str(e)}
# ============================================================================
# ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
# ============================================================================
@app.post("/context/set-system-prompt")
async def set_system_prompt(prompt: str = Form(...)):
"""์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •"""
try:
context_manager.set_system_prompt(prompt)
return {
"success": True,
"message": "์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ๊ฐ€ ์„ค์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"prompt_length": len(prompt)
}
except Exception as e:
logger.error(f"โŒ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ • ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/context/add-message")
async def add_context_message(
role: str = Form(...), # 'user' ๋˜๋Š” 'assistant'
content: str = Form(...),
message_id: str = Form(None),
metadata: str = Form("{}") # JSON ๋ฌธ์ž์—ด
):
"""์ปจํ…์ŠคํŠธ์— ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
try:
import json
metadata_dict = json.loads(metadata) if metadata else {}
if role == "user":
msg_id = context_manager.add_user_message(content, message_id, metadata_dict)
elif role == "assistant":
msg_id = context_manager.add_assistant_message(content, message_id, metadata_dict)
else:
return {"success": False, "error": "์ž˜๋ชป๋œ ์—ญํ• ์ž…๋‹ˆ๋‹ค. 'user' ๋˜๋Š” 'assistant'๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”."}
return {
"success": True,
"message": "๋ฉ”์‹œ์ง€๊ฐ€ ์ปจํ…์ŠคํŠธ์— ์ถ”๊ฐ€๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"message_id": msg_id,
"context_summary": context_manager.get_context_summary()
}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.get("/context/get")
async def get_context(
include_system: bool = True,
max_length: Optional[int] = None,
recent_turns: Optional[int] = None
):
"""ํ˜„์žฌ ์ปจํ…์ŠคํŠธ ์กฐํšŒ"""
try:
if recent_turns:
context = context_manager.get_recent_context(recent_turns)
else:
context = context_manager.get_context(include_system, max_length)
return {
"success": True,
"context": context,
"context_summary": context_manager.get_context_summary(),
"memory_efficiency": context_manager.get_memory_efficiency()
}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ์กฐํšŒ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.get("/context/summary")
async def get_context_summary():
"""์ปจํ…์ŠคํŠธ ์š”์•ฝ ์ •๋ณด ์กฐํšŒ"""
try:
return {
"success": True,
"summary": context_manager.get_context_summary(),
"memory_efficiency": context_manager.get_memory_efficiency()
}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ์š”์•ฝ ์กฐํšŒ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/context/clear")
async def clear_context():
"""์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”"""
try:
context_manager.clear_context()
return {
"success": True,
"message": "์ปจํ…์ŠคํŠธ๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.delete("/context/message/{message_id}")
async def remove_context_message(message_id: str):
"""์ปจํ…์ŠคํŠธ์—์„œ ํŠน์ • ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ"""
try:
success = context_manager.remove_message(message_id)
if success:
return {
"success": True,
"message": "๋ฉ”์‹œ์ง€๊ฐ€ ์ œ๊ฑฐ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"context_summary": context_manager.get_context_summary()
}
else:
return {"success": False, "error": "๋ฉ”์‹œ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.put("/context/message/{message_id}")
async def edit_context_message(
message_id: str,
new_content: str = Form(...)
):
"""์ปจํ…์ŠคํŠธ ๋ฉ”์‹œ์ง€ ์ˆ˜์ •"""
try:
success = context_manager.edit_message(message_id, new_content)
if success:
return {
"success": True,
"message": "๋ฉ”์‹œ์ง€๊ฐ€ ์ˆ˜์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"context_summary": context_manager.get_context_summary()
}
else:
return {"success": False, "error": "๋ฉ”์‹œ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ ๋ฉ”์‹œ์ง€ ์ˆ˜์ • ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.get("/context/search")
async def search_context(query: str, max_results: int = 5):
"""์ปจํ…์ŠคํŠธ ๋‚ด์—์„œ ๊ฒ€์ƒ‰"""
try:
results = context_manager.search_context(query, max_results)
return {
"success": True,
"query": query,
"results": results,
"total_results": len(results)
}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๊ฒ€์ƒ‰ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/context/export")
async def export_context(file_path: str = Form(None)):
"""์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ"""
try:
exported_path = context_manager.export_context(file_path)
return {
"success": True,
"message": "์ปจํ…์ŠคํŠธ๊ฐ€ ๋‚ด๋ณด๋‚ด์กŒ์Šต๋‹ˆ๋‹ค.",
"file_path": exported_path
}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/context/import")
async def import_context(file_path: str = Form(...)):
"""์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ"""
try:
success = context_manager.import_context(file_path)
if success:
return {
"success": True,
"message": "์ปจํ…์ŠคํŠธ๊ฐ€ ๊ฐ€์ ธ์™€์กŒ์Šต๋‹ˆ๋‹ค.",
"context_summary": context_manager.get_context_summary()
}
else:
return {"success": False, "error": "์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
# ============================================================================
# LoRA/QLoRA ๊ด€๋ฆฌ ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
# ============================================================================
@app.post("/lora/load-base-model")
async def load_lora_base_model(
model_path: str = Form(...),
model_type: str = Form("causal_lm")
):
"""LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
success = lora_manager.load_base_model(model_path, model_type)
if success:
return {
"success": True,
"message": "๊ธฐ๋ณธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"model_path": model_path,
"device": lora_manager.device
}
else:
return {"success": False, "error": "๋ชจ๋ธ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/create-config")
async def create_lora_config(
r: int = Form(16),
lora_alpha: int = Form(32),
target_modules: str = Form("q_proj,v_proj,k_proj,o_proj,gate_proj,up_proj,down_proj"),
lora_dropout: float = Form(0.1),
bias: str = Form("none"),
task_type: str = Form("CAUSAL_LM")
):
"""LoRA ์„ค์ • ์ƒ์„ฑ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
# target_modules๋ฅผ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜
target_modules_list = target_modules.split(",") if target_modules else None
config = lora_manager.create_lora_config(
r=r,
lora_alpha=lora_alpha,
target_modules=target_modules_list,
lora_dropout=lora_dropout,
bias=bias,
task_type=task_type
)
return {
"success": True,
"message": "LoRA ์„ค์ •์ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"config": config.to_dict()
}
except Exception as e:
logger.error(f"โŒ LoRA ์„ค์ • ์ƒ์„ฑ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/apply")
async def apply_lora_adapter(adapter_name: str = Form("default")):
"""LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋ชจ๋ธ์— ์ ์šฉ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
success = lora_manager.apply_lora_to_model(adapter_name)
if success:
return {
"success": True,
"message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ ์šฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"adapter_name": adapter_name,
"stats": lora_manager.get_adapter_stats()
}
else:
return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/load-adapter")
async def load_lora_adapter(
adapter_path: str = Form(...),
adapter_name: str = Form(None)
):
"""์ €์žฅ๋œ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
success = lora_manager.load_lora_adapter(adapter_path, adapter_name)
if success:
return {
"success": True,
"message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"adapter_name": lora_manager.current_adapter_name,
"stats": lora_manager.get_adapter_stats()
}
else:
return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/save-adapter")
async def save_lora_adapter(
adapter_name: str = Form(None),
output_dir: str = Form(None)
):
"""LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
success = lora_manager.save_lora_adapter(adapter_name, output_dir)
if success:
return {
"success": True,
"message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"adapter_name": lora_manager.current_adapter_name
}
else:
return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.get("/lora/adapters")
async def list_lora_adapters():
"""์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ LoRA ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
adapters = lora_manager.list_available_adapters()
return {
"success": True,
"adapters": adapters
}
except Exception as e:
logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก ์กฐํšŒ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.get("/lora/stats")
async def get_lora_stats():
"""ํ˜„์žฌ LoRA ์–ด๋Œ‘ํ„ฐ ํ†ต๊ณ„"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
stats = lora_manager.get_adapter_stats()
return {
"success": True,
"stats": stats
}
except Exception as e:
logger.error(f"โŒ LoRA ํ†ต๊ณ„ ์กฐํšŒ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/switch")
async def switch_lora_adapter(adapter_name: str = Form(...)):
"""LoRA ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
success = lora_manager.switch_adapter(adapter_name)
if success:
return {
"success": True,
"message": f"LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ {adapter_name}์œผ๋กœ ์ „ํ™˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"adapter_name": adapter_name,
"stats": lora_manager.get_adapter_stats()
}
else:
return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/unload")
async def unload_lora_adapter():
"""LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
success = lora_manager.unload_adapter()
if success:
return {
"success": True,
"message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ์–ธ๋กœ๋“œ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
else:
return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/generate")
async def generate_with_lora(
prompt: str = Form(...),
max_length: int = Form(100),
temperature: float = Form(0.7)
):
"""LoRA ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
response = lora_manager.generate_text(prompt, max_length, temperature)
return {
"success": True,
"response": response,
"adapter_name": lora_manager.current_adapter_name
}
except Exception as e:
logger.error(f"โŒ LoRA ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
@app.post("/lora/merge")
async def merge_lora_with_base(output_path: str = Form(None)):
"""LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๊ธฐ๋ณธ ๋ชจ๋ธ๊ณผ ๋ณ‘ํ•ฉ"""
if not LORA_AVAILABLE or lora_manager is None:
return {
"success": False,
"error": "LoRA ๊ธฐ๋Šฅ์ด ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
}
try:
success = lora_manager.merge_lora_with_base(output_path)
if success:
return {
"success": True,
"message": "LoRA ์–ด๋Œ‘ํ„ฐ๊ฐ€ ๊ธฐ๋ณธ ๋ชจ๋ธ๊ณผ ๋ณ‘ํ•ฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.",
"output_path": output_path or f"{lora_manager.base_model_path}_merged"
}
else:
return {"success": False, "error": "LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค."}
except Exception as e:
logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์‹คํŒจ: {e}")
return {"success": False, "error": str(e)}
# ============================================================================
# ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‹œ์Šคํ…œ ์—”๋“œํฌ์ธํŠธ
# ============================================================================
@app.post("/hybrid-rag/upload", response_model=DocumentUploadResponse)
async def upload_hybrid_document(
file: UploadFile = File(...),
user_id: str = Form("default_user"),
document_id: Optional[str] = Form(None)
):
"""๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์—…๋กœ๋“œ"""
try:
# ํŒŒ์ผ ์ €์žฅ
upload_dir = Path("uploads/hybrid_rag")
upload_dir.mkdir(parents=True, exist_ok=True)
if not document_id:
document_id = f"{user_id}_{int(time.time())}_{file.filename}"
file_path = upload_dir / document_id
with open(file_path, "wb") as buffer:
content = await file.read()
buffer.write(content)
# ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ
result = hybrid_rag_processor.process_document_hybrid(str(file_path), user_id, document_id)
if result["success"]:
# ์„ฑ๊ณตํ•œ ์‹œ์Šคํ…œ ์ˆ˜ ๊ณ„์‚ฐ
success_systems = []
for key, value in result.items():
if key.endswith('_processing') and value and value.get('success', False):
system_name = key.replace('_processing', '').replace('_', ' ').title()
success_systems.append(system_name)
return DocumentUploadResponse(
success=True,
document_id=document_id,
message=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {', '.join(success_systems)} ์‹œ์Šคํ…œ์—์„œ ์ฒ˜๋ฆฌ๋จ",
chunks=len(success_systems)
)
else:
return DocumentUploadResponse(
success=False,
error=result.get("error", "๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ์ฒ˜๋ฆฌ ์‹คํŒจ")
)
except Exception as e:
logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์—…๋กœ๋“œ ์˜ค๋ฅ˜: {e}")
return DocumentUploadResponse(
success=False,
error=f"์—…๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
)
@app.post("/hybrid-rag/generate", response_model=RAGResponse)
async def generate_hybrid_rag_response(
query: str = Form(...),
user_id: str = Form("default_user"),
document_id: str = Form(...),
use_text: bool = Form(True),
use_image: bool = Form(True),
use_latex: bool = Form(True),
use_latex_ocr: bool = Form(False), # LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋จ
max_length: Optional[int] = Form(None),
temperature: Optional[float] = Form(None),
top_p: Optional[float] = Form(None),
do_sample: Optional[bool] = Form(None)
):
"""๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‘๋‹ต ์ƒ์„ฑ"""
try:
result = hybrid_rag_processor.generate_hybrid_response(
query, user_id, document_id,
use_text, use_image, use_latex, use_latex_ocr,
max_length, temperature, top_p, do_sample
)
return RAGResponse(
success=result["success"],
response=result["response"],
context=result["context"],
sources=result["sources"],
search_results=result["search_results"],
processing_time=result["processing_time"]
)
except Exception as e:
logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‘๋‹ต ์ƒ์„ฑ ์˜ค๋ฅ˜: {e}")
return RAGResponse(
success=False,
response=f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}",
context="",
sources=[],
search_results=0,
processing_time=0.0
)
@app.get("/hybrid-rag/document/{user_id}/{document_id}")
async def get_hybrid_document_info(user_id: str, document_id: str):
"""๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ"""
try:
result = hybrid_rag_processor.get_document_info(user_id, document_id)
return result
except Exception as e:
logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ๋ฌธ์„œ ์ •๋ณด ์กฐํšŒ ์˜ค๋ฅ˜: {e}")
return {"success": False, "error": str(e)}
@app.get("/hybrid-rag/status")
async def get_hybrid_rag_status():
"""๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์‹œ์Šคํ…œ ์ƒํƒœ ํ™•์ธ"""
try:
return {
"text_rag_available": True,
"image_rag_available": True,
"latex_rag_available": True,
"latex_ocr_faiss_available": False, # LaTeX-OCR ๊ธฐ๋Šฅ์ด ๋น„ํ™œ์„ฑํ™”๋จ
"status": "ready"
}
except Exception as e:
logger.error(f"๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ RAG ์ƒํƒœ ํ™•์ธ ์˜ค๋ฅ˜: {e}")
return {"status": "error", "error": str(e)}
# ============================================================================
# ๐Ÿ”„ RAG ์‹œ์Šคํ…œ๊ณผ ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ํ†ตํ•ฉ API
# ============================================================================
@app.post("/rag/context-integrated/query")
async def rag_query_with_context_integration(
user_id: str = Form(...),
document_id: str = Form(...),
query: str = Form(...),
session_id: str = Form(...),
max_results: int = Form(5),
enable_context_integration: bool = Form(True)
):
"""RAG ์ฟผ๋ฆฌ + ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ - ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž์™€ ์—ฐ๋™"""
try:
logger.info(f"๐Ÿ” RAG + ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ์ฟผ๋ฆฌ ์‹œ์ž‘: ์‚ฌ์šฉ์ž {user_id}, ๋ฌธ์„œ {document_id}, ์„ธ์…˜ {session_id}")
# ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ํ™•์ธ
if not context_manager:
return {"status": "error", "message": "์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
# RAG ์‘๋‹ต ์ƒ์„ฑ (์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ํ™œ์„ฑํ™”)
rag_result = rag_processor.generate_rag_response(
user_id=user_id,
document_id=document_id,
query=query,
session_id=session_id if enable_context_integration else None,
context_manager=context_manager if enable_context_integration else None
)
if not rag_result["success"]:
return rag_result
# ์ปจํ…์ŠคํŠธ์— RAG ๊ฒฐ๊ณผ ํ†ตํ•ฉ
if enable_context_integration:
try:
# RAG ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๋ฅผ ์ปจํ…์ŠคํŠธ์— ์ถ”๊ฐ€
rag_summary = f"RAG ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ: {query}์— ๋Œ€ํ•œ {rag_result.get('search_results', 0)}๊ฐœ ๊ด€๋ จ ๋ฌธ์„œ ๋ฐœ๊ฒฌ"
# ์ปจํ…์ŠคํŠธ์— ์‹œ์Šคํ…œ ๋ฉ”์‹œ์ง€๋กœ ์ถ”๊ฐ€
context_manager.add_system_message(
rag_summary,
metadata={"session_id": session_id, "type": "rag_integration", "query": query}
)
logger.info(f"๐Ÿ”„ RAG ๊ฒฐ๊ณผ๋ฅผ ์ปจํ…์ŠคํŠธ์— ํ†ตํ•ฉ ์™„๋ฃŒ (์„ธ์…˜: {session_id})")
except Exception as e:
logger.warning(f"โš ๏ธ ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ์‹คํŒจ: {e}")
# ํ†ตํ•ฉ๋œ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
result = {
"status": "success",
"rag_response": rag_result,
"context_integration": enable_context_integration,
"session_id": session_id,
"context_summary": context_manager.get_context_summary(session_id) if enable_context_integration else None
}
logger.info(f"โœ… RAG + ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ์ฟผ๋ฆฌ ์™„๋ฃŒ")
return result
except Exception as e:
logger.error(f"โŒ RAG + ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ์ฟผ๋ฆฌ ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}
@app.get("/rag/context-integrated/summary/{session_id}")
async def get_rag_context_summary(session_id: str):
"""RAG ํ†ตํ•ฉ ์ปจํ…์ŠคํŠธ ์š”์•ฝ ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
# ์ปจํ…์ŠคํŠธ ์š”์•ฝ ์ •๋ณด
context_summary = context_manager.get_context_summary(session_id)
# RAG ๊ด€๋ จ ์ •๋ณด ์ถ”์ถœ
rag_contexts = []
if session_id in context_manager.session_conversations:
for turn in context_manager.session_conversations[session_id]:
if (hasattr(turn, 'metadata') and turn.metadata and
turn.metadata.get('type') == 'rag_integration'):
rag_contexts.append({
"query": turn.metadata.get('query', ''),
"content": turn.content,
"timestamp": turn.timestamp
})
return {
"status": "success",
"session_id": session_id,
"context_summary": context_summary,
"rag_contexts": rag_contexts,
"rag_context_count": len(rag_contexts)
}
except Exception as e:
logger.error(f"โŒ RAG ์ปจํ…์ŠคํŠธ ์š”์•ฝ ์กฐํšŒ ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}
@app.post("/rag/context-integrated/clear/{session_id}")
async def clear_rag_context(session_id: str):
"""RAG ํ†ตํ•ฉ ์ปจํ…์ŠคํŠธ ์ •๋ฆฌ"""
try:
if not context_manager:
return {"status": "error", "message": "์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
# RAG ๊ด€๋ จ ์ปจํ…์ŠคํŠธ๋งŒ ์ œ๊ฑฐ
if session_id in context_manager.session_conversations:
conversation_history = context_manager.session_conversations[session_id]
rag_turns = []
for turn in conversation_history:
if (hasattr(turn, 'metadata') and turn.metadata and
turn.metadata.get('type') == 'rag_integration'):
rag_turns.append(turn)
# RAG ๊ด€๋ จ ํ„ด ์ œ๊ฑฐ
for turn in rag_turns:
context_manager.remove_message(turn.message_id, session_id)
logger.info(f"๐Ÿ—‘๏ธ RAG ์ปจํ…์ŠคํŠธ ์ •๋ฆฌ ์™„๋ฃŒ: {len(rag_turns)}๊ฐœ ํ„ด ์ œ๊ฑฐ (์„ธ์…˜: {session_id})")
return {
"status": "success",
"session_id": session_id,
"removed_rag_turns": len(rag_turns),
"message": f"RAG ์ปจํ…์ŠคํŠธ {len(rag_turns)}๊ฐœ ํ„ด์ด ์ œ๊ฑฐ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
return {
"status": "success",
"session_id": session_id,
"removed_rag_turns": 0,
"message": "์ œ๊ฑฐํ•  RAG ์ปจํ…์ŠคํŠธ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ RAG ์ปจํ…์ŠคํŠธ ์ •๋ฆฌ ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}
@app.get("/rag/performance/stats")
async def get_rag_performance_stats():
"""RAG ์‹œ์Šคํ…œ ์„ฑ๋Šฅ ํ†ต๊ณ„ ์กฐํšŒ"""
try:
# RAG ํ”„๋กœ์„ธ์„œ ์„ฑ๋Šฅ ํ†ต๊ณ„
rag_stats = rag_processor.get_performance_stats()
# ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ฑ๋Šฅ ํ†ต๊ณ„
vector_stats = vector_store_manager.get_performance_stats()
# ํ†ตํ•ฉ ์„ฑ๋Šฅ ํ†ต๊ณ„
combined_stats = {
"rag_processor": rag_stats,
"vector_store": vector_stats,
"overall": {
"total_operations": rag_stats.get("total_requests", 0) + vector_stats.get("total_operations", 0),
"success_rate": (rag_stats.get("success_rate", 0.0) + vector_stats.get("success_rate", 0.0)) / 2,
"avg_processing_time": (rag_stats.get("avg_processing_time", 0.0) + vector_stats.get("avg_operation_time", 0.0)) / 2
},
"timestamp": time.time()
}
return {
"status": "success",
"performance_stats": combined_stats
}
except Exception as e:
logger.error(f"โŒ RAG ์„ฑ๋Šฅ ํ†ต๊ณ„ ์กฐํšŒ ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}
@app.post("/rag/performance/reset")
async def reset_rag_performance_stats():
"""RAG ์‹œ์Šคํ…œ ์„ฑ๋Šฅ ํ†ต๊ณ„ ์ดˆ๊ธฐํ™”"""
try:
# RAG ํ”„๋กœ์„ธ์„œ ํ†ต๊ณ„ ์ดˆ๊ธฐํ™”
rag_processor.reset_stats()
# ๋ฒกํ„ฐ ์Šคํ† ์–ด ํ†ต๊ณ„ ์ดˆ๊ธฐํ™”
vector_store_manager.reset_stats()
logger.info("๐Ÿ”„ RAG ์‹œ์Šคํ…œ ์„ฑ๋Šฅ ํ†ต๊ณ„ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
return {
"status": "success",
"message": "RAG ์‹œ์Šคํ…œ ์„ฑ๋Šฅ ํ†ต๊ณ„๊ฐ€ ์ดˆ๊ธฐํ™”๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
}
except Exception as e:
logger.error(f"โŒ RAG ์„ฑ๋Šฅ ํ†ต๊ณ„ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}
@app.get("/rag/health/check")
async def rag_health_check():
"""RAG ์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ํ™•์ธ"""
try:
# RAG ํ”„๋กœ์„ธ์„œ ์ƒํƒœ
rag_status = {
"rag_processor": "healthy",
"enable_context_integration": rag_processor.enable_context_integration,
"max_context_length": rag_processor.max_context_length,
"max_search_results": rag_processor.max_search_results
}
# ๋ฒกํ„ฐ ์Šคํ† ์–ด ์ƒํƒœ
vector_status = vector_store_manager.health_check()
# ๋ฌธ์„œ ํ”„๋กœ์„ธ์„œ ์ƒํƒœ
doc_processor_status = {
"status": "healthy",
"supported_formats": document_processor.supported_formats if hasattr(document_processor, 'supported_formats') else [],
"ocr_available": hasattr(document_processor, 'ocr_reader') and document_processor.ocr_reader is not None
}
# ํ†ตํ•ฉ ์ƒํƒœ
overall_status = "healthy"
if vector_status.get("status") != "healthy":
overall_status = "degraded"
return {
"status": "success",
"overall_status": overall_status,
"rag_processor": rag_status,
"vector_store": vector_status,
"document_processor": doc_processor_status,
"timestamp": time.time()
}
except Exception as e:
logger.error(f"โŒ RAG ์‹œ์Šคํ…œ ๊ฑด๊ฐ• ์ƒํƒœ ํ™•์ธ ์‹คํŒจ: {e}")
return {
"status": "error",
"overall_status": "unhealthy",
"error": str(e),
"timestamp": time.time()
}
@app.post("/rag/context-integrated/batch-process")
async def batch_process_with_context_integration(
user_id: str = Form(...),
session_id: str = Form(...),
documents: List[UploadFile] = File(...),
enable_context_integration: bool = Form(True)
):
"""๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ + ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ"""
try:
logger.info(f"๐Ÿ“š ๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ + ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ์‹œ์ž‘: ์‚ฌ์šฉ์ž {user_id}, ์„ธ์…˜ {session_id}, ๋ฌธ์„œ {len(documents)}๊ฐœ")
results = []
for i, doc in enumerate(documents):
try:
# ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
temp_path = f"./temp_{user_id}_{session_id}_{i}_{int(time.time())}"
with open(temp_path, "wb") as f:
f.write(doc.file.read())
# ๋ฌธ์„œ ID ์ƒ์„ฑ
document_id = f"batch_{session_id}_{i}_{int(time.time())}"
# RAG ์ฒ˜๋ฆฌ
rag_result = rag_processor.process_and_store_document(
user_id=user_id,
document_id=document_id,
file_path=temp_path
)
# ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ
if enable_context_integration and rag_result["success"]:
try:
context_manager.add_system_message(
f"๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {doc.filename} ({rag_result.get('chunks', 0)}๊ฐœ ์ฒญํฌ)",
metadata={"session_id": session_id, "type": "batch_rag", "filename": doc.filename}
)
except Exception as e:
logger.warning(f"โš ๏ธ ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ์‹คํŒจ: {e}")
# ์ž„์‹œ ํŒŒ์ผ ์ •๋ฆฌ
try:
os.remove(temp_path)
except:
pass
results.append({
"filename": doc.filename,
"document_id": document_id,
"rag_result": rag_result,
"context_integration": enable_context_integration
})
except Exception as e:
logger.error(f"โŒ ๋ฌธ์„œ {doc.filename} ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
results.append({
"filename": doc.filename,
"error": str(e),
"context_integration": enable_context_integration
})
# ์„ฑ๊ณต/์‹คํŒจ ํ†ต๊ณ„
success_count = sum(1 for r in results if r.get("rag_result", {}).get("success", False))
error_count = len(results) - success_count
logger.info(f"โœ… ๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์™„๋ฃŒ: {success_count}๊ฐœ ์„ฑ๊ณต, {error_count}๊ฐœ ์‹คํŒจ")
return {
"status": "success",
"user_id": user_id,
"session_id": session_id,
"total_documents": len(documents),
"success_count": success_count,
"error_count": error_count,
"results": results,
"context_integration": enable_context_integration
}
except Exception as e:
logger.error(f"โŒ ๋ฐฐ์น˜ ๋ฌธ์„œ ์ฒ˜๋ฆฌ + ์ปจํ…์ŠคํŠธ ํ†ตํ•ฉ ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}
@app.get("/rag/context-integrated/search-history/{session_id}")
async def get_rag_search_history(session_id: str, limit: int = 10):
"""RAG ๊ฒ€์ƒ‰ ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."}
# RAG ๊ด€๋ จ ๊ฒ€์ƒ‰ ํžˆ์Šคํ† ๋ฆฌ ์ถ”์ถœ
search_history = []
if session_id in context_manager.session_conversations:
for turn in context_manager.session_conversations[session_id]:
if (hasattr(turn, 'metadata') and turn.metadata and
turn.metadata.get('type') in ['rag_integration', 'rag_context', 'batch_rag']):
search_history.append({
"timestamp": turn.timestamp,
"type": turn.metadata.get('type'),
"query": turn.metadata.get('query', ''),
"filename": turn.metadata.get('filename', ''),
"content": turn.content
})
# ์ตœ๊ทผ ์ˆœ์œผ๋กœ ์ •๋ ฌํ•˜๊ณ  ์ œํ•œ
search_history.sort(key=lambda x: x['timestamp'], reverse=True)
limited_history = search_history[:limit]
return {
"status": "success",
"session_id": session_id,
"search_history": limited_history,
"total_count": len(search_history),
"limited_count": len(limited_history)
}
except Exception as e:
logger.error(f"โŒ RAG ๊ฒ€์ƒ‰ ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}
# ============================================================================
# ๐Ÿ”„ ์‹ค๋ฌด์šฉ ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž API ์—”๋“œํฌ์ธํŠธ
# ============================================================================
@app.get("/context/advanced/summary-method")
async def get_summary_method():
"""ํ˜„์žฌ ์š”์•ฝ ๋ฐฉ๋ฒ• ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
return {
"status": "success",
"current_method": context_manager.current_summary_method,
"available_methods": list(context_manager.summary_models.keys())
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/context/advanced/summary-method")
async def set_summary_method(method: str = Form(...)):
"""์š”์•ฝ ๋ฐฉ๋ฒ• ์„ค์ •"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
context_manager.set_summary_method(method)
return {
"status": "success",
"message": f"์š”์•ฝ ๋ฐฉ๋ฒ•์ด {method}๋กœ ๋ณ€๊ฒฝ๋˜์—ˆ์Šต๋‹ˆ๋‹ค",
"current_method": context_manager.current_summary_method
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/advanced/summary-stats/{session_id}")
async def get_advanced_summary_stats(session_id: str):
"""๊ณ ๊ธ‰ ์š”์•ฝ ํ†ต๊ณ„ ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
summary_stats = context_manager.get_summary_stats(session_id)
return {
"status": "success",
"session_id": session_id,
"summary_stats": summary_stats
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/advanced/compressed/{session_id}")
async def get_compressed_context(session_id: str, max_tokens: Optional[int] = None):
"""์••์ถ•๋œ ์ปจํ…์ŠคํŠธ ์กฐํšŒ (์š”์•ฝ ํฌํ•จ)"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
compressed_context = context_manager.get_compressed_context(session_id, max_tokens)
estimated_tokens = context_manager._estimate_tokens(compressed_context)
return {
"status": "success",
"session_id": session_id,
"compressed_context": compressed_context,
"estimated_tokens": estimated_tokens,
"context_length": len(compressed_context)
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/context/advanced/force-compress/{session_id}")
async def force_compression(session_id: str):
"""๊ฐ•์ œ ์••์ถ• ์‹คํ–‰"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
# ์••์ถ• ์ „ ํ†ต๊ณ„
before_stats = context_manager.get_summary_stats(session_id)
# ๊ฐ•์ œ ์••์ถ• ์‹คํ–‰
context_manager.force_compression(session_id)
# ์••์ถ• ํ›„ ํ†ต๊ณ„
after_stats = context_manager.get_summary_stats(session_id)
return {
"status": "success",
"message": f"์„ธ์…˜ {session_id} ๊ฐ•์ œ ์••์ถ• ์™„๋ฃŒ",
"session_id": session_id,
"before_compression": before_stats,
"after_compression": after_stats,
"compression_effect": {
"summary_reduction": before_stats.get("total_summaries", 0) - after_stats.get("total_summaries", 0),
"token_reduction": before_stats.get("total_tokens", 0) - after_stats.get("total_tokens", 0)
}
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/advanced/turn-summaries/{session_id}")
async def get_turn_summaries(session_id: str, limit: int = 10):
"""ํ„ด ์š”์•ฝ ๋ชฉ๋ก ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
if session_id not in context_manager.turn_summaries:
return {
"status": "success",
"session_id": session_id,
"turn_summaries": [],
"total_count": 0
}
summaries = context_manager.turn_summaries[session_id]
limited_summaries = summaries[-limit:] if limit > 0 else summaries
# TurnSummary ๊ฐ์ฒด๋ฅผ ๋”•์…”๋„ˆ๋ฆฌ๋กœ ๋ณ€ํ™˜
summary_data = []
for summary in limited_summaries:
summary_data.append({
"turn_id": summary.turn_id,
"user_message": summary.user_message,
"assistant_message": summary.assistant_message,
"summary": summary.summary,
"timestamp": summary.timestamp,
"tokens_estimated": summary.tokens_estimated,
"key_topics": summary.key_topics
})
return {
"status": "success",
"session_id": session_id,
"turn_summaries": summary_data,
"total_count": len(summaries),
"limited_count": len(limited_summaries)
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/advanced/compression-history/{session_id}")
async def get_compression_history(session_id: str):
"""์••์ถ• ํžˆ์Šคํ† ๋ฆฌ ์กฐํšŒ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
if session_id not in context_manager.compression_history:
return {
"status": "success",
"session_id": session_id,
"compression_history": [],
"total_compressions": 0
}
history = context_manager.compression_history[session_id]
return {
"status": "success",
"session_id": session_id,
"compression_history": history,
"total_compressions": len(history)
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/advanced/optimized/{session_id}")
async def get_optimized_context(session_id: str, model_name: str = "default"):
"""๋ชจ๋ธ๋ณ„ ์ตœ์ ํ™”๋œ ์ปจํ…์ŠคํŠธ ์กฐํšŒ (์š”์•ฝ ํฌํ•จ)"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
# ๋ชจ๋ธ๋ณ„ ์ตœ์ ํ™”๋œ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
optimized_context = context_manager.get_context_for_model(model_name, session_id)
estimated_tokens = context_manager._estimate_tokens(optimized_context)
# ์ปจํ…์ŠคํŠธ ์š”์•ฝ ์ •๋ณด๋„ ํ•จ๊ป˜ ์ œ๊ณต
context_summary = context_manager.get_context_summary(session_id)
summary_stats = context_manager.get_summary_stats(session_id)
return {
"status": "success",
"session_id": session_id,
"model_name": model_name,
"optimized_context": optimized_context,
"estimated_tokens": estimated_tokens,
"context_length": len(optimized_context),
"context_summary": context_summary,
"summary_stats": summary_stats
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/context/advanced/export-enhanced/{session_id}")
async def export_enhanced_context(session_id: str, file_path: str = Form(None)):
"""ํ–ฅ์ƒ๋œ ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ (์š”์•ฝ ์ •๋ณด ํฌํ•จ)"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
exported_path = context_manager.export_context(file_path, session_id)
if exported_path:
return {
"status": "success",
"message": f"์„ธ์…˜ {session_id} ํ–ฅ์ƒ๋œ ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ ์™„๋ฃŒ",
"file_path": exported_path,
"session_id": session_id
}
else:
return {"status": "error", "message": "๋‚ด๋ณด๋‚ด๊ธฐ ์‹คํŒจ"}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/context/advanced/import-enhanced")
async def import_enhanced_context(file_path: str = Form(...)):
"""ํ–ฅ์ƒ๋œ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ (์š”์•ฝ ์ •๋ณด ํฌํ•จ)"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
success = context_manager.import_context(file_path)
if success:
return {
"status": "success",
"message": "ํ–ฅ์ƒ๋œ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์™„๋ฃŒ",
"file_path": file_path,
"context_summary": context_manager.get_context_summary("default")
}
else:
return {"status": "error", "message": "๊ฐ€์ ธ์˜ค๊ธฐ ์‹คํŒจ"}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/context/advanced/health-check")
async def advanced_context_health_check():
"""๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์ƒํƒœ ํ™•์ธ"""
try:
if not context_manager:
return {"status": "error", "message": "Context manager not available"}
# ๊ธฐ๋ณธ ์ƒํƒœ ํ™•์ธ
basic_status = {
"context_manager_available": True,
"total_sessions": len(context_manager.session_conversations),
"max_tokens": context_manager.max_tokens,
"max_turns": context_manager.max_turns,
"strategy": context_manager.strategy
}
# ์š”์•ฝ ์‹œ์Šคํ…œ ์ƒํƒœ ํ™•์ธ
summary_status = {
"summarization_enabled": context_manager.enable_summarization,
"current_summary_method": context_manager.current_summary_method,
"available_summary_methods": list(context_manager.summary_models.keys()),
"summary_threshold": context_manager.summary_threshold,
"max_summary_tokens": context_manager.max_summary_tokens
}
# ์ž๋™ ์ •๋ฆฌ ์ƒํƒœ ํ™•์ธ
cleanup_status = context_manager.get_auto_cleanup_config()
# ์„ธ์…˜๋ณ„ ์ƒ์„ธ ์ •๋ณด
session_details = {}
for session_id in context_manager.session_conversations.keys():
session_details[session_id] = {
"turns": len(context_manager.session_conversations[session_id]),
"turn_summaries": len(context_manager.turn_summaries.get(session_id, [])),
"compression_history": len(context_manager.compression_history.get(session_id, [])),
"context_summary": context_manager.get_context_summary(session_id),
"summary_stats": context_manager.get_summary_stats(session_id)
}
return {
"status": "success",
"basic_status": basic_status,
"summary_status": summary_status,
"cleanup_status": cleanup_status,
"session_details": session_details,
"timestamp": time.time()
}
except Exception as e:
return {"status": "error", "message": str(e)}
""
# ============================================================================
# ์‚ฌ์šฉ์ž ๋ฉ”๋ชจ๋ฆฌ ์„ค์ • ๊ด€๋ฆฌ API
# ============================================================================
@app.get("/user/memory/settings/{user_id}")
async def get_user_memory_settings(user_id: str):
"""์‚ฌ์šฉ์ž ๋ฉ”๋ชจ๋ฆฌ ์„ค์ • ์กฐํšŒ"""
try:
from lily_llm_core.user_memory_manager import user_memory_manager
# ๊ธฐ๋ณธ ์„ค์ • ์กฐํšŒ
keep_memory = user_memory_manager.get_memory_setting(user_id, "keep_memory_on_room_change")
return {
"status": "success",
"user_id": user_id,
"settings": {
"keep_memory_on_room_change": keep_memory if keep_memory is not None else True
}
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/user/memory/settings/{user_id}")
async def update_user_memory_settings(
user_id: str,
keep_memory_on_room_change: bool = Form(True)
):
"""์‚ฌ์šฉ์ž ๋ฉ”๋ชจ๋ฆฌ ์„ค์ • ์—…๋ฐ์ดํŠธ"""
try:
from lily_llm_core.user_memory_manager import user_memory_manager
# ์„ค์ • ์—…๋ฐ์ดํŠธ
success = user_memory_manager.update_memory_setting(
user_id, "keep_memory_on_room_change", keep_memory_on_room_change
)
if success:
return {
"status": "success",
"message": f"์‚ฌ์šฉ์ž {user_id} ๋ฉ”๋ชจ๋ฆฌ ์„ค์ • ์—…๋ฐ์ดํŠธ ์™„๋ฃŒ",
"settings": {
"keep_memory_on_room_change": keep_memory_on_room_change
}
}
else:
return {"status": "error", "message": "์„ค์ • ์—…๋ฐ์ดํŠธ ์‹คํŒจ"}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/user/memory/room-change/{user_id}")
async def handle_room_change(user_id: str, new_room_id: str = Form(...)):
"""Room ๋ณ€๊ฒฝ ์‹œ ๋ฉ”๋ชจ๋ฆฌ ์ฒ˜๋ฆฌ"""
try:
from lily_llm_core.user_memory_manager import user_memory_manager
from lily_llm_core.integrated_memory_manager import integrated_memory_manager
# ์‚ฌ์šฉ์ž ์„ค์ • ํ™•์ธ
keep_memory = user_memory_manager.get_memory_setting(user_id, "keep_memory_on_room_change")
if keep_memory:
# ๋ฉ”๋ชจ๋ฆฌ ์œ ์ง€ (๊ธฐ๋ณธ ๋™์ž‘)
logger.info(f"๐Ÿ”„ ์‚ฌ์šฉ์ž {user_id}๊ฐ€ room {new_room_id}๋กœ ์ด๋™ - ๋ฉ”๋ชจ๋ฆฌ ์œ ์ง€")
return {
"status": "success",
"message": f"Room {new_room_id}๋กœ ์ด๋™ - ๋ฉ”๋ชจ๋ฆฌ ์œ ์ง€๋จ",
"memory_preserved": True
}
else:
# ๋ฉ”๋ชจ๋ฆฌ ์ดˆ๊ธฐํ™”
logger.info(f"๐Ÿ”„ ์‚ฌ์šฉ์ž {user_id}๊ฐ€ room {new_room_id}๋กœ ์ด๋™ - ๋ฉ”๋ชจ๋ฆฌ ์ดˆ๊ธฐํ™”")
# ์„ธ์…˜ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”
if context_manager:
# ์‚ฌ์šฉ์ž ๊ด€๋ จ ์„ธ์…˜๋“ค ์ฐพ์•„์„œ ์ดˆ๊ธฐํ™”
user_sessions = [
session_id for session_id in context_manager.session_conversations.keys()
if f"user_{user_id}" in session_id
]
for session_id in user_sessions:
context_manager.clear_session_context(session_id)
logger.info(f"๐Ÿ—‘๏ธ ์„ธ์…˜ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”: {session_id}")
# Room ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” (์‚ฌ์šฉ์ž ๊ด€๋ จ ๋ฌธ์„œ ์ œ๊ฑฐ)
try:
room_context = integrated_memory_manager.room_context_manager.get_room_context(new_room_id)
if room_context and room_context.documents:
# ์‚ฌ์šฉ์ž๊ฐ€ ์—…๋กœ๋“œํ•œ ๋ฌธ์„œ๋“ค ์ œ๊ฑฐ
original_count = len(room_context.documents)
room_context.documents = [
doc for doc in room_context.documents
if (isinstance(doc, dict) and doc.get('uploaded_by') != user_id) or
(hasattr(doc, 'uploaded_by') and getattr(doc, 'uploaded_by') != user_id)
]
# ๋ณ€๊ฒฝ์‚ฌํ•ญ ์ €์žฅ
integrated_memory_manager.room_context_manager.save_room_context(new_room_id, room_context)
removed_count = original_count - len(room_context.documents)
logger.info(f"๏ฟฝ๏ฟฝ๏ธ Room {new_room_id}์—์„œ ์‚ฌ์šฉ์ž {user_id} ๋ฌธ์„œ {removed_count}๊ฐœ ์ œ๊ฑฐ")
except Exception as e:
logger.warning(f"โš ๏ธ Room ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
return {
"status": "success",
"message": f"Room {new_room_id}๋กœ ์ด๋™ - ๋ฉ”๋ชจ๋ฆฌ ์ดˆ๊ธฐํ™”๋จ",
"memory_preserved": False,
"context_cleared": True
}
except Exception as e:
logger.error(f"โŒ Room ๋ณ€๊ฒฝ ์ฒ˜๋ฆฌ ์‹คํŒจ: {e}")
return {"status": "error", "message": str(e)}