soyailabs / app /routes.py
SOY NV AI
Improve Ollama error handling and connection checking - Add Ollama server connection check before API calls - Improve error messages for better debugging - Add detailed error logging for Ollama communication errors
ead5574
raw
history blame
217 kB
from flask import Blueprint, render_template, request, jsonify, send_from_directory, redirect, url_for, flash
from flask_login import login_user, logout_user, login_required, current_user
from werkzeug.utils import secure_filename
from app.database import db, UploadedFile, User, ChatSession, ChatMessage, DocumentChunk, ParentChunk, SystemConfig, EpisodeAnalysis, GraphEntity, GraphRelationship, GraphEvent
from app.vector_db import get_vector_db
from app.gemini_client import get_gemini_client
import requests
import os
from datetime import datetime
import uuid
import re
import json
main_bp = Blueprint('main', __name__)
def admin_required(f):
"""κ΄€λ¦¬μž κΆŒν•œμ΄ ν•„μš”ν•œ λ°μ½”λ ˆμ΄ν„°"""
from functools import wraps
@wraps(f)
@login_required
def decorated_function(*args, **kwargs):
if not current_user.is_admin:
# API μš”μ²­μΈ 경우 JSON 응닡 λ°˜ν™˜
if request.path.startswith('/api/'):
return jsonify({'error': 'κ΄€λ¦¬μž κΆŒν•œμ΄ ν•„μš”ν•©λ‹ˆλ‹€.'}), 403
flash('κ΄€λ¦¬μž κΆŒν•œμ΄ ν•„μš”ν•©λ‹ˆλ‹€.', 'error')
return redirect(url_for('main.index'))
return f(*args, **kwargs)
return decorated_function
# Ollama κΈ°λ³Έ URL (ν™˜κ²½ λ³€μˆ˜λ‘œ μ„€μ • κ°€λŠ₯)
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
def get_model_token_limit(model_name, default_tokens=2000, token_type='output'):
"""λͺ¨λΈλ³„ 토큰 수 μ œν•œ κ°€μ Έμ˜€κΈ° (ν•˜μœ„ ν˜Έν™˜μ„±μ„ μœ„ν•΄ 기본값은 좜λ ₯ 토큰)
Args:
model_name: AI λͺ¨λΈλͺ… (예: "gemini-2.0-flash-exp", "gemini:gemini-2.0-flash-exp", "gemma2:9b")
default_tokens: κΈ°λ³Έ 토큰 수 (섀정이 없을 λ•Œ μ‚¬μš©)
token_type: 'input' λ˜λŠ” 'output' (κΈ°λ³Έκ°’: 'output')
Returns:
토큰 수 (μ •μˆ˜)
"""
return get_model_token_limit_by_type(model_name, default_tokens, token_type)
def get_model_token_limit_by_type(model_name, default_tokens=2000, token_type='output'):
"""λͺ¨λΈλ³„ 토큰 수 μ œν•œ κ°€μ Έμ˜€κΈ° (μž…λ ₯/좜λ ₯/Parent Chunk ꡬ뢄)
Args:
model_name: AI λͺ¨λΈλͺ… (예: "gemini-2.0-flash-exp", "gemini:gemini-2.0-flash-exp", "gemma2:9b")
default_tokens: κΈ°λ³Έ 토큰 수 (섀정이 없을 λ•Œ μ‚¬μš©)
token_type: 'input', 'output', λ˜λŠ” 'parent_chunk'
Returns:
토큰 수 (μ •μˆ˜)
"""
if not model_name:
return default_tokens
try:
from app.database import SystemConfig
# μ—¬λŸ¬ ν˜•μ‹μ˜ λͺ¨λΈλͺ…을 μ‹œλ„
# 1. 원본 λͺ¨λΈλͺ… κ·ΈλŒ€λ‘œ
# 2. Gemini λͺ¨λΈμ˜ 경우 "gemini:" 접두사 μΆ”κ°€/제거 버전
# 3. Ollama λͺ¨λΈμ˜ 경우 κ·ΈλŒ€λ‘œ
model_name_clean = model_name.strip()
possible_keys = [model_name_clean]
# Gemini λͺ¨λΈ 처리
if model_name_clean.startswith('gemini:'):
# "gemini:gemini-2.0-flash-exp" -> "gemini:gemini-2.0-flash-exp" (κ·ΈλŒ€λ‘œ)
# λ˜λŠ” "gemini-2.0-flash-exp" (접두사 제거)
possible_keys.append(model_name_clean.replace('gemini:', '', 1))
elif model_name_clean.startswith('gemini-'):
# "gemini-2.0-flash-exp" -> "gemini:gemini-2.0-flash-exp" (접두사 μΆ”κ°€)
possible_keys.append(f'gemini:{model_name_clean}')
# 각 κ°€λŠ₯ν•œ ν‚€λ₯Ό μ‹œλ„
for key in possible_keys:
# μƒˆλ‘œμš΄ ν˜•μ‹: model_token_input_{model_name}, model_token_output_{model_name}, model_token_parent_chunk_{model_name}
config_key = f"model_token_{token_type}_{key}"
token_value = SystemConfig.get_config(config_key)
if token_value:
try:
token_int = int(token_value)
print(f"[get_model_token_limit_by_type] λͺ¨λΈ '{model_name}'의 {token_type} 토큰 수 {token_int} μ‚¬μš© (ν‚€: {config_key})")
return token_int
except (ValueError, TypeError):
continue
# ν•˜μœ„ ν˜Έν™˜μ„±: κΈ°μ‘΄ ν˜•μ‹ model_token_{model_name}도 확인 (좜λ ₯ ν† ν°μœΌλ‘œ κ°„μ£Ό)
if token_type == 'output':
old_config_key = f"model_token_{key}"
token_value = SystemConfig.get_config(old_config_key)
if token_value:
try:
token_int = int(token_value)
print(f"[get_model_token_limit_by_type] λͺ¨λΈ '{model_name}'의 좜λ ₯ 토큰 수 {token_int} μ‚¬μš© (κΈ°μ‘΄ ν‚€: {old_config_key})")
return token_int
except (ValueError, TypeError):
continue
# 섀정이 μ—†μœΌλ©΄ κΈ°λ³Έκ°’ μ‚¬μš©
print(f"[get_model_token_limit_by_type] λͺ¨λΈ '{model_name}'의 {token_type} 토큰 수 섀정이 μ—†μ–΄ κΈ°λ³Έκ°’ {default_tokens} μ‚¬μš©")
except Exception as e:
print(f"[get_model_token_limit_by_type] 였λ₯˜: {e}")
return default_tokens
# μ—…λ‘œλ“œ μ„€μ •
UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'uploads')
ALLOWED_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'epub'}
# μ—…λ‘œλ“œ 폴더 경둜 좜λ ₯ (λ””λ²„κΉ…μš©)
print(f"[μ—…λ‘œλ“œ μ„€μ •] μ—…λ‘œλ“œ 폴더 경둜: {UPLOAD_FOLDER}")
print(f"[μ—…λ‘œλ“œ μ„€μ •] μ—…λ‘œλ“œ 폴더 쑴재 μ—¬λΆ€: {os.path.exists(UPLOAD_FOLDER)}")
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def ensure_upload_folder():
"""μ—…λ‘œλ“œ 폴더가 μ—†μœΌλ©΄ 생성"""
try:
if not os.path.exists(UPLOAD_FOLDER):
print(f"μ—…λ‘œλ“œ 폴더 생성 쀑: {UPLOAD_FOLDER}")
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
if not os.path.exists(UPLOAD_FOLDER):
raise Exception(f'μ—…λ‘œλ“œ 폴더λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€: {UPLOAD_FOLDER}')
# 폴더 μ“°κΈ° κΆŒν•œ 확인
test_file = os.path.join(UPLOAD_FOLDER, '.write_test')
try:
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
print(f"μ—…λ‘œλ“œ 폴더 μ“°κΈ° κΆŒν•œ 확인 μ™„λ£Œ: {UPLOAD_FOLDER}")
except PermissionError as e:
raise Exception(f'μ—…λ‘œλ“œ 폴더에 μ“°κΈ° κΆŒν•œμ΄ μ—†μŠ΅λ‹ˆλ‹€: {UPLOAD_FOLDER} - {str(e)}')
except Exception as e:
raise Exception(f'μ—…λ‘œλ“œ 폴더 μ“°κΈ° ν…ŒμŠ€νŠΈ μ‹€νŒ¨: {UPLOAD_FOLDER} - {str(e)}')
except Exception as e:
print(f"μ—…λ‘œλ“œ 폴더 생성 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
raise
def split_text_into_chunks(text, min_chunk_size=200, max_chunk_size=1000, overlap=150):
"""의미 기반 ν…μŠ€νŠΈ μ²­ν‚Ή (λ¬Έμž₯κ³Ό 문단 경계λ₯Ό κ³ λ €ν•˜μ—¬ λΆ„ν• )"""
if not text or len(text.strip()) == 0:
return []
# 1단계: 문단 λ‹¨μœ„λ‘œ λΆ„ν•  (빈 쀄 κΈ°μ€€)
paragraphs = re.split(r'\n\s*\n', text.strip())
paragraphs = [p.strip() for p in paragraphs if p.strip()]
if not paragraphs:
return []
# 2단계: 각 문단을 λ¬Έμž₯ λ‹¨μœ„λ‘œ λΆ„ν• 
# λ¬Έμž₯ μ’…κ²° 기호: . ! ? (ν•œκΈ€κ³Ό 영문 λͺ¨λ‘ 지원)
# ꡬ두점 뒀에 κ³΅λ°±μ΄λ‚˜ μ€„λ°”κΏˆμ΄ μ˜€λŠ” 경우 λ¬Έμž₯ μ’…λ£Œλ‘œ κ°„μ£Ό
sentence_pattern = r'([.!?]+)(?=\s+|$)'
all_sentences = []
for para in paragraphs:
# λ¬Έμž₯ 뢄리 (ꡬ두점 포함)
parts = re.split(sentence_pattern, para)
combined_sentences = []
current_sentence = ""
for i, part in enumerate(parts):
if part.strip():
if re.match(r'^[.!?]+$', part):
# ꡬ두점인 경우 ν˜„μž¬ λ¬Έμž₯에 μΆ”κ°€ν•˜κ³  λ¬Έμž₯ μ™„μ„±
current_sentence += part
if current_sentence.strip():
combined_sentences.append(current_sentence.strip())
current_sentence = ""
else:
# ν…μŠ€νŠΈμΈ 경우 ν˜„μž¬ λ¬Έμž₯에 μΆ”κ°€
current_sentence += part
# λ§ˆμ§€λ§‰ λ¬Έμž₯ 처리 (ꡬ두점이 μ—†λŠ” 경우)
if current_sentence.strip():
combined_sentences.append(current_sentence.strip())
# λ¬Έμž₯이 ν•˜λ‚˜λ„ μ—†λŠ” 경우 (ꡬ두점이 μ „ν˜€ μ—†λŠ” 문단)
if not combined_sentences and para.strip():
combined_sentences.append(para.strip())
all_sentences.extend(combined_sentences)
if not all_sentences:
# λ¬Έμž₯ 뢄리가 μ•ˆ λ˜λŠ” 경우 원본 ν…μŠ€νŠΈλ₯Ό κ·ΈλŒ€λ‘œ λ°˜ν™˜
return [text] if text.strip() else []
# 3단계: λ¬Έμž₯듀을 λͺ¨μ•„μ„œ 의미 μžˆλŠ” 청크 생성
chunks = []
current_chunk = []
current_size = 0
for sentence in all_sentences:
sentence_size = len(sentence)
# ν˜„μž¬ 청크에 λ¬Έμž₯ μΆ”κ°€ μ‹œ μ΅œλŒ€ 크기λ₯Ό μ΄ˆκ³Όν•˜λŠ” 경우
if current_size + sentence_size > max_chunk_size and current_chunk:
# ν˜„μž¬ 청크 μ €μž₯ (μ€„λ°”κΏˆ μœ μ§€)
chunk_text = '\n'.join(current_chunk)
if len(chunk_text.strip()) >= min_chunk_size:
chunks.append(chunk_text)
else:
# μ΅œμ†Œ 크기 미만이면 λ‹€μŒ 청크와 병합 (μ˜€λ²„λž© 효과)
if chunks:
chunks[-1] = chunks[-1] + '\n' + chunk_text
else:
chunks.append(chunk_text)
# μ˜€λ²„λž©μ„ μœ„ν•œ λ¬Έμž₯ μœ μ§€ (λ§ˆμ§€λ§‰ λͺ‡ λ¬Έμž₯을 λ‹€μŒ 청크에 포함)
overlap_sentences = []
overlap_size = 0
for s in reversed(current_chunk):
if overlap_size + len(s) <= overlap:
overlap_sentences.insert(0, s)
overlap_size += len(s) + 1 # μ€„λ°”κΏˆ 포함
else:
break
current_chunk = overlap_sentences + [sentence]
current_size = overlap_size + sentence_size
else:
# ν˜„μž¬ 청크에 λ¬Έμž₯ μΆ”κ°€
current_chunk.append(sentence)
current_size += sentence_size + 1 # μ€„λ°”κΏˆ 포함
# λ§ˆμ§€λ§‰ 청크 μΆ”κ°€
if current_chunk:
chunk_text = '\n'.join(current_chunk)
if chunks and len(chunk_text.strip()) < min_chunk_size:
# μ΅œμ†Œ 크기 미만이면 이전 청크와 병합
chunks[-1] = chunks[-1] + '\n' + chunk_text
else:
chunks.append(chunk_text)
# 빈 청크 제거 및 μ΅œμ†Œ 크기 미만 청크 처리
final_chunks = []
for chunk in chunks:
chunk = chunk.strip()
if chunk and len(chunk) >= min_chunk_size:
final_chunks.append(chunk)
elif chunk:
# μ΅œμ†Œ 크기 미만 μ²­ν¬λŠ” 이전 청크와 병합
if final_chunks:
final_chunks[-1] = final_chunks[-1] + '\n' + chunk
else:
final_chunks.append(chunk)
return final_chunks if final_chunks else [text] if text.strip() else []
def extract_chapter_number(text):
"""ν…μŠ€νŠΈμ—μ„œ 챕터 번호 μΆ”μΆœ"""
# λ‹€μ–‘ν•œ 챕터 νŒ¨ν„΄ λ§€μΉ­
patterns = [
r'제\s*(\d+)\s*μž₯', # 제1μž₯, 제 1 μž₯
r'제\s*(\d+)\s*ν™”', # 제1ν™”
r'Chapter\s*(\d+)', # Chapter 1
r'CHAPTER\s*(\d+)', # CHAPTER 1
r'Ch\.\s*(\d+)', # Ch. 1
r'(\d+)\s*μž₯', # 1μž₯
r'(\d+)\s*ν™”', # 1ν™”
r'CHAPTER\s*(\d+)', # CHAPTER 1
r'chap\.\s*(\d+)', # chap. 1
r'ch\s*(\d+)', # ch 1
r'(\d+)\s*η« ', # 1η« 
]
# ν…μŠ€νŠΈμ˜ 처음 500자만 검사 (챕터 μ •λ³΄λŠ” 보톡 μ•žλΆ€λΆ„μ— 있음)
search_text = text[:500]
for pattern in patterns:
match = re.search(pattern, search_text, re.IGNORECASE)
if match:
try:
chapter_num = int(match.group(1))
return chapter_num
except:
continue
return None
def split_content_by_episodes(content):
"""원본 μ›Ήμ†Œμ„€μ„ #μž‘ν’ˆμ„€λͺ…, #1ν™”, #2ν™” λ“±μœΌλ‘œ λΆ„ν• 
Returns:
list: [(section_type, section_title, section_content, metadata), ...]
section_type: 'μž‘ν’ˆμ„€λͺ…' or 'ν™”'
section_title: 'μž‘ν’ˆμ„€λͺ…' or '1ν™”', '2ν™”', ...
metadata: {'chapter': '#μž‘ν’ˆμ„€λͺ…'} or {'chapter': '1ν™”'}
"""
if not content or len(content.strip()) == 0:
return []
sections = []
# #μž‘ν’ˆμ„€λͺ…, #1ν™”, #2ν™” λ“±μ˜ νŒ¨ν„΄ μ°ΎκΈ°
# νŒ¨ν„΄: #μž‘ν’ˆμ„€λͺ…, #1ν™”, #2ν™”, #10ν™” λ“±
episode_pattern = r'^#\s*(μž‘ν’ˆμ„€λͺ…|\d+ν™”)'
lines = content.split('\n')
current_section_type = None
current_section_title = None
current_section_content = []
current_section_start_line = 0
for i, line in enumerate(lines):
# 쀄 μ‹œμž‘ λΆ€λΆ„μ—μ„œ #μž‘ν’ˆμ„€λͺ… λ˜λŠ” #nν™” νŒ¨ν„΄ μ°ΎκΈ°
match = re.match(episode_pattern, line.strip())
if match:
# 이전 μ„Ήμ…˜ μ €μž₯
if current_section_type and current_section_content:
section_content = '\n'.join(current_section_content).strip()
if section_content:
# 메타데이터 생성
if current_section_type == 'μž‘ν’ˆμ„€λͺ…':
metadata = {'chapter': '#μž‘ν’ˆμ„€λͺ…'}
else:
metadata = {'chapter': current_section_title}
sections.append((
current_section_type,
current_section_title,
section_content,
metadata
))
# μƒˆ μ„Ήμ…˜ μ‹œμž‘
section_title = match.group(1)
if section_title == 'μž‘ν’ˆμ„€λͺ…':
current_section_type = 'μž‘ν’ˆμ„€λͺ…'
current_section_title = 'μž‘ν’ˆμ„€λͺ…'
else:
current_section_type = 'ν™”'
current_section_title = section_title # '1ν™”', '2ν™”' λ“±
current_section_content = [line] # 헀더 라인 포함
current_section_start_line = i
else:
# ν˜„μž¬ μ„Ήμ…˜μ— λ‚΄μš© μΆ”κ°€
if current_section_content is not None:
current_section_content.append(line)
# λ§ˆμ§€λ§‰ μ„Ήμ…˜ μ €μž₯
if current_section_type and current_section_content:
section_content = '\n'.join(current_section_content).strip()
if section_content:
# 메타데이터 생성
if current_section_type == 'μž‘ν’ˆμ„€λͺ…':
metadata = {'chapter': '#μž‘ν’ˆμ„€λͺ…'}
else:
metadata = {'chapter': current_section_title}
sections.append((
current_section_type,
current_section_title,
section_content,
metadata
))
# μ„Ήμ…˜μ΄ ν•˜λ‚˜λ„ μ—†μœΌλ©΄ 전체λ₯Ό ν•˜λ‚˜μ˜ μ„Ήμ…˜μœΌλ‘œ 처리
if not sections:
sections.append((
'기타',
'전체',
content.strip(),
{'chapter': None}
))
return sections
def extract_metadata_with_ai(chunk_content, full_content=None, parent_chunk=None, model_name=None):
"""AIλ₯Ό μ‚¬μš©ν•˜μ—¬ 청크의 메타데이터 μΆ”μΆœ (ν™”μž, λ“±μž₯인물, μ‹œκ°„μ  λ°°κ²½, 인물 관계)
Args:
chunk_content: 뢄석할 청크 λ‚΄μš©
full_content: 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš© (인물 관계 νŒŒμ•…μš©)
parent_chunk: Parent Chunk 객체 (선택사항)
model_name: μ‚¬μš©ν•  AI λͺ¨λΈλͺ…
"""
try:
# 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš©μ„ μ°Έμ‘°ν•˜μ—¬ 인물 관계 νŒŒμ•…
full_content_preview = ""
if full_content:
# 전체 λ‚΄μš©μ΄ λ„ˆλ¬΄ κΈΈλ©΄ μ•žλΆ€λΆ„κ³Ό λ’·λΆ€λΆ„ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 20000자)
if len(full_content) > 20000:
full_content_preview = full_content[:10000] + "\n... (쀑간 μƒλž΅) ...\n" + full_content[-10000:]
else:
full_content_preview = full_content
# ν”„λ‘¬ν”„νŠΈ 생성
prompt = f"""λ‹€μŒ μ›Ήμ†Œμ„€ ν…μŠ€νŠΈλ₯Ό λΆ„μ„ν•˜μ—¬ μ•„λž˜ 정보λ₯Ό JSON ν˜•μ‹μœΌλ‘œλ§Œ μ‘λ‹΅ν•˜μ„Έμš”.
원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš© (참고용):
{full_content_preview[:50000] if full_content_preview else "μ—†μŒ"}
뢄석할 청크 ν…μŠ€νŠΈ:
{chunk_content[:2000]}
λ‹€μŒ ν˜•μ‹μœΌλ‘œλ§Œ μ‘λ‹΅ν•˜μ„Έμš” (JSON ν˜•μ‹):
{{
"pov": "ν™”μž/μ‹œμ μ„ μ„€λͺ…ν•˜μ„Έμš” (예: 1인칭 주인곡, 3인칭 전지적 μž‘κ°€ λ“±)",
"characters": ["λ“±μž₯인물1", "λ“±μž₯인물2"],
"time_background": "μ‹œκ°„μ  λ°°κ²½ μ„€λͺ… (예: κ³Όκ±° νšŒμƒ, ν˜„μž¬ μ‹œμ , 미래 λ“±)",
"character_relationships": [
{{
"character1": "인물1",
"character2": "인물2",
"relationship": "ν˜„μž¬ μ‹œμ μ—μ„œμ˜ 관계 μ„€λͺ… (예: 연인, 적, 친ꡬ, κ°€μ‘± λ“±)"
}}
]
}}
character_relationshipsλŠ” 이 청크에 λ“±μž₯ν•˜λŠ” 인물듀 κ°„μ˜ ν˜„μž¬ 관계λ₯Ό 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš©μ„ μ°Έκ³ ν•˜μ—¬ νŒŒμ•…ν•œ κ²ƒμž…λ‹ˆλ‹€.
응닡은 였직 JSON ν˜•μ‹λ§Œ μ‚¬μš©ν•˜κ³ , λ‹€λ₯Έ μ„€λͺ…은 ν¬ν•¨ν•˜μ§€ λ§ˆμ„Έμš”."""
# λͺ¨λΈλͺ…이 μ—†μœΌλ©΄ κΈ°λ³Έκ°’ μ‚¬μš© (Gemini μš°μ„  μ‹œλ„)
if not model_name:
# Gemini μ‹œλ„
try:
gemini_client = get_gemini_client()
if gemini_client.is_configured():
result = gemini_client.generate_response(
prompt=prompt,
model_name="gemini-1.5-flash",
temperature=0.3,
max_output_tokens=get_model_token_limit(model_name or "gemini-1.5-flash", 500) # μ €μž₯된 토큰 수 μ‚¬μš©
)
if not result['error'] and result.get('response'):
response_text = result['response'].strip()
# JSON μΆ”μΆœ
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
metadata = json.loads(json_match.group(0))
return metadata
except:
pass
# λͺ¨λΈλͺ…이 μžˆκ±°λ‚˜ Gemini μ‹€νŒ¨ μ‹œ ν•΄λ‹Ή λͺ¨λΈ μ‚¬μš©
if model_name:
model_name_lower = model_name.lower().strip()
is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-')
if is_gemini:
gemini_model_name = model_name.strip()
if gemini_model_name.lower().startswith('gemini:'):
gemini_model_name = gemini_model_name.split(':', 1)[1].strip()
gemini_client = get_gemini_client()
if gemini_client.is_configured():
result = gemini_client.generate_response(
prompt=prompt,
model_name=gemini_model_name,
temperature=0.3,
max_output_tokens=get_model_token_limit(model_name or "gemini-1.5-flash", 500) # μ €μž₯된 토큰 수 μ‚¬μš©
)
if not result['error'] and result.get('response'):
response_text = result['response'].strip()
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
metadata = json.loads(json_match.group(0))
return metadata
else:
# Ollama API 호좜
try:
# μž…λ ₯ 토큰 수λ₯Ό num_ctx둜 μ‚¬μš©
num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input')
ollama_response = requests.post(
f'{OLLAMA_BASE_URL}/api/generate',
json={
'model': model_name,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3,
'num_predict': get_model_token_limit(model_name, 500), # μ €μž₯된 토큰 수 μ‚¬μš©
'num_ctx': num_ctx # μž…λ ₯ 토큰 수λ₯Ό μ»¨ν…μŠ€νŠΈ μœˆλ„μš°λ‘œ μ‚¬μš©
}
},
timeout=120 # 2λΆ„ νƒ€μž„μ•„μ›ƒ
)
if ollama_response.status_code == 200:
response_data = ollama_response.json()
response_text = response_data.get('response', '').strip()
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
metadata = json.loads(json_match.group(0))
return metadata
except:
pass
# AI μΆ”μΆœ μ‹€νŒ¨ μ‹œ κΈ°λ³Έκ°’ λ°˜ν™˜
return {
"pov": None,
"characters": [],
"time_background": None,
"character_relationships": []
}
except Exception as e:
print(f"[메타데이터 μΆ”μΆœ] 였λ₯˜: {str(e)}")
return {
"pov": None,
"characters": [],
"time_background": None,
"character_relationships": []
}
def extract_chunk_metadata(chunk_content, full_content=None, chunk_index=None, file_id=None, model_name=None):
"""청크의 메타데이터 μΆ”μΆœ (ν™”μž, λ“±μž₯인물, μ‹œκ°„μ  λ°°κ²½, 인물 관계)
Args:
chunk_content: 뢄석할 청크 λ‚΄μš©
full_content: 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš© (인물 관계 νŒŒμ•…μš©)
chunk_index: 청크 인덱슀
file_id: 파일 ID
model_name: μ‚¬μš©ν•  AI λͺ¨λΈλͺ…
"""
metadata = {
"pov": None,
"characters": [],
"time_background": None,
"character_relationships": []
}
# AIλ₯Ό μ‚¬μš©ν•œ 메타데이터 μΆ”μΆœ (ν™”μž, λ“±μž₯인물, μ‹œκ°„μ  λ°°κ²½, 인물 관계)
# Parent Chunkκ°€ 있으면 μ°Έμ‘°
parent_chunk = None
if file_id:
try:
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
except:
pass
# 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš©μ„ μ°Έμ‘°ν•˜μ—¬ 메타데이터 μΆ”μΆœ
ai_metadata = extract_metadata_with_ai(chunk_content, full_content, parent_chunk, model_name)
if ai_metadata:
metadata["pov"] = ai_metadata.get("pov")
metadata["characters"] = ai_metadata.get("characters", [])
metadata["time_background"] = ai_metadata.get("time_background")
metadata["character_relationships"] = ai_metadata.get("character_relationships", [])
return metadata
def analyze_episode(episode_content, episode_title, full_content=None, parent_chunk=None, model_name=None):
"""νšŒμ°¨λ³„ 뢄석 (μ£Όμš” μŠ€ν† λ¦¬, λ“±μž₯인물, 인물 관계 λ³€ν™”, 기타)
Args:
episode_content: 뢄석할 회차 λ‚΄μš©
episode_title: 회차 제λͺ© (예: '1ν™”', '2ν™”')
full_content: 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš© (참고용)
parent_chunk: Parent Chunk 객체 (선택사항)
model_name: μ‚¬μš©ν•  AI λͺ¨λΈλͺ…
Returns:
뢄석 κ²°κ³Ό ν…μŠ€νŠΈ (ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ΄μ–΄μ„œ μ €μž₯)
"""
try:
# 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš©μ„ μ°Έμ‘°
full_content_preview = ""
if full_content:
# 전체 λ‚΄μš©μ΄ λ„ˆλ¬΄ κΈΈλ©΄ μ•žλΆ€λΆ„κ³Ό λ’·λΆ€λΆ„ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 30000자)
if len(full_content) > 30000:
full_content_preview = full_content[:15000] + "\n... (쀑간 μƒλž΅) ...\n" + full_content[-15000:]
else:
full_content_preview = full_content
# Parent Chunk 정보 μΆ”κ°€
parent_info = ""
if parent_chunk:
parent_info = f"""
μž‘ν’ˆ 전체 정보:
- 세계관: {parent_chunk.world_view or 'μ—†μŒ'}
- μ£Όμš” 캐릭터: {parent_chunk.characters or 'μ—†μŒ'}
- μ£Όμš” μŠ€ν† λ¦¬: {parent_chunk.story or 'μ—†μŒ'}
"""
# ν”„λ‘¬ν”„νŠΈ 생성
prompt = f"""λ‹€μŒ μ›Ήμ†Œμ„€μ˜ {episode_title} 회차λ₯Ό λΆ„μ„ν•˜μ—¬ μ•„λž˜ ν•­λͺ©λ“€μ„ ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ΄μ–΄μ„œ μž‘μ„±ν•΄μ£Όμ„Έμš”.
{parent_info}
원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš© (참고용):
{full_content_preview[:50000] if full_content_preview else "μ—†μŒ"}
뢄석할 회차 λ‚΄μš© ({episode_title}):
{episode_content[:10000] if len(episode_content) > 10000 else episode_content}
λ‹€μŒ ν˜•μ‹μœΌλ‘œ 뢄석 κ²°κ³Όλ₯Ό μž‘μ„±ν•΄μ£Όμ„Έμš” (ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ΄μ–΄μ„œ μž‘μ„±):
## {episode_title} μ£Όμš” μŠ€ν† λ¦¬ 뢄석
[이 νšŒμ°¨μ—μ„œ μΌμ–΄λ‚œ μ£Όμš” 사건과 μŠ€ν† λ¦¬ μ „κ°œλ₯Ό μƒμ„Ένžˆ λΆ„μ„ν•΄μ£Όμ„Έμš”]
## {episode_title} μ£Όμš” λ“±μž₯ 인물 뢄석
[이 νšŒμ°¨μ— λ“±μž₯ν•œ μ£Όμš” 인물듀과 κ·Έλ“€μ˜ μ—­ν• , 행동, νŠΉμ§•μ„ λΆ„μ„ν•΄μ£Όμ„Έμš”]
## 인물과 μΈλ¬Όκ°„μ˜ 관계 λ³€ν™”
[이 νšŒμ°¨μ—μ„œ 인물듀 κ°„μ˜ 관계가 μ–΄λ–»κ²Œ λ³€ν™”ν–ˆλŠ”μ§€, μƒˆλ‘œμš΄ 관계가 ν˜•μ„±λ˜μ—ˆλŠ”μ§€ 등을 λΆ„μ„ν•΄μ£Όμ„Έμš”]
## {episode_title} 인물 μ™Έλͺ¨ 뢄석
[이 νšŒμ°¨μ— λ“±μž₯ν•œ μΈλ¬Όλ“€μ˜ μ™Έλͺ¨, μ²΄ν˜•, μ–Όκ΅΄ νŠΉμ§•, 신체적 νŠΉμ§• 등을 μƒμ„Ένžˆ λΆ„μ„ν•΄μ£Όμ„Έμš”. 특히 μƒˆλ‘œ λ“±μž₯ν•œ μΈλ¬Όμ΄λ‚˜ μ™Έλͺ¨κ°€ λ³€κ²½λœ 인물에 λŒ€ν•΄ μžμ„Ένžˆ μ„€λͺ…ν•΄μ£Όμ„Έμš”]
## {episode_title} 인물 의볡 뢄석
[이 νšŒμ°¨μ— λ“±μž₯ν•œ 인물듀이 μ°©μš©ν•œ 의볡, 볡μž₯, μ•‘μ„Έμ„œλ¦¬ 등을 μƒμ„Ένžˆ λΆ„μ„ν•΄μ£Όμ„Έμš”. 의볡의 μŠ€νƒ€μΌ, 색상, νŠΉμ§•, 상황에 λ§žλŠ” 볡μž₯인지 등을 λΆ„μ„ν•΄μ£Όμ„Έμš”]
## {episode_title} λ°°κ²½ 뢄석
[이 회차의 배경이 λ˜λŠ” μž₯μ†Œ, ν™˜κ²½, μ‹œκ°„λŒ€, λΆ„μœ„κΈ° 등을 μƒμ„Ένžˆ λΆ„μ„ν•΄μ£Όμ„Έμš”. μž₯μ†Œμ˜ νŠΉμ§•, λΆ„μœ„κΈ°, μ‹œκ°„μ  λ°°κ²½, 날씨, κ³„μ ˆ 등을 ν¬ν•¨ν•˜μ—¬ λΆ„μ„ν•΄μ£Όμ„Έμš”]
## 기타
[이 회차의 νŠΉλ³„ν•œ 점, μ€‘μš” 사건, λ–‘λ°₯, 볡선 λ“± 기타 μ€‘μš”ν•œ λ‚΄μš©μ„ λΆ„μ„ν•΄μ£Όμ„Έμš”]
응닡은 μœ„ ν˜•μ‹μ„ κ·ΈλŒ€λ‘œ μœ μ§€ν•˜λ©΄μ„œ 각 ν•­λͺ©μ— λŒ€ν•œ μƒμ„Έν•œ 뢄석 λ‚΄μš©μ„ μž‘μ„±ν•΄μ£Όμ„Έμš”."""
# λͺ¨λΈλͺ…이 μ—†μœΌλ©΄ κΈ°λ³Έκ°’ μ‚¬μš© (Gemini μš°μ„  μ‹œλ„)
if not model_name:
# Gemini μ‹œλ„
try:
gemini_client = get_gemini_client()
if gemini_client.is_configured():
result = gemini_client.generate_response(
prompt=prompt,
model_name="gemini-1.5-flash",
temperature=0.5,
max_output_tokens=get_model_token_limit("gemini-1.5-flash", 3000) # μ €μž₯된 토큰 수 μ‚¬μš©
)
if not result['error'] and result.get('response'):
return result['response'].strip()
except Exception as e:
print(f"[회차 뢄석] Gemini κΈ°λ³Έ λͺ¨λΈ 였λ₯˜: {str(e)}")
# λͺ¨λΈλͺ…이 μžˆκ±°λ‚˜ Gemini μ‹€νŒ¨ μ‹œ ν•΄λ‹Ή λͺ¨λΈ μ‚¬μš©
if model_name:
model_name_lower = model_name.lower().strip()
is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-')
if is_gemini:
gemini_model_name = model_name.strip()
if gemini_model_name.lower().startswith('gemini:'):
gemini_model_name = gemini_model_name.split(':', 1)[1].strip()
gemini_client = get_gemini_client()
if gemini_client.is_configured():
result = gemini_client.generate_response(
prompt=prompt,
model_name=gemini_model_name,
temperature=0.5,
max_output_tokens=get_model_token_limit(model_name, 3000) # μ €μž₯된 토큰 수 μ‚¬μš©
)
if not result['error'] and result.get('response'):
return result['response'].strip()
else:
# Ollama API 호좜
try:
# μž…λ ₯ 토큰 수λ₯Ό num_ctx둜 μ‚¬μš©
num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input')
ollama_response = requests.post(
f'{OLLAMA_BASE_URL}/api/generate',
json={
'model': model_name,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.5,
'num_predict': get_model_token_limit(model_name, 3000), # μ €μž₯된 토큰 수 μ‚¬μš©
'num_ctx': num_ctx # μž…λ ₯ 토큰 수λ₯Ό μ»¨ν…μŠ€νŠΈ μœˆλ„μš°λ‘œ μ‚¬μš©
}
},
timeout=300 # 5λΆ„ νƒ€μž„μ•„μ›ƒ (회차 뢄석은 μ‹œκ°„μ΄ 였래 걸릴 수 있음)
)
if ollama_response.status_code == 200:
response_data = ollama_response.json()
return response_data.get('response', '').strip()
except requests.exceptions.Timeout:
print(f"[회차 뢄석] Ollama νƒ€μž„μ•„μ›ƒ: μš”μ²­ μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (5λΆ„)")
print(f"[회차 뢄석] 회차 λ‚΄μš©μ΄ λ„ˆλ¬΄ κΈΈκ±°λ‚˜ λͺ¨λΈ 응닡이 느릴 수 μžˆμŠ΅λ‹ˆλ‹€.")
except requests.exceptions.ConnectionError:
print(f"[회차 뢄석] Ollama μ—°κ²° 였λ₯˜: Ollama μ„œλ²„μ— μ—°κ²°ν•  수 μ—†μŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[회차 뢄석] Ollama 였λ₯˜: {str(e)}")
# AI 뢄석 μ‹€νŒ¨ μ‹œ κΈ°λ³Έκ°’ λ°˜ν™˜
return f"## {episode_title} 뢄석\n뢄석을 μ™„λ£Œν•  수 μ—†μ—ˆμŠ΅λ‹ˆλ‹€."
except Exception as e:
print(f"[회차 뢄석] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return f"## {episode_title} 뢄석\n뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}"
def extract_graph_from_episode(episode_content, episode_title, file_id, full_content=None, parent_chunk=None, model_name=None):
"""νšŒμ°¨λ³„ Graph Extraction (엔티티와 관계 μΆ”μΆœ)
Args:
episode_content: 뢄석할 회차 λ‚΄μš©
episode_title: 회차 제λͺ© (예: '1ν™”', '2ν™”')
file_id: 파일 ID
full_content: 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš© (참고용)
parent_chunk: Parent Chunk 객체 (선택사항)
model_name: μ‚¬μš©ν•  AI λͺ¨λΈλͺ…
Returns:
μΆ”μΆœ 성곡 μ—¬λΆ€ (bool)
"""
try:
print(f"[Graph Extraction] '{episode_title}' Graph Extraction μ‹œμž‘...")
# Parent Chunk 정보 μΆ”κ°€
parent_info = ""
if parent_chunk:
parent_info = f"""
μž‘ν’ˆ 전체 정보:
- 세계관: {parent_chunk.world_view or 'μ—†μŒ'}
- μ£Όμš” 캐릭터: {parent_chunk.characters or 'μ—†μŒ'}
- μ£Όμš” μŠ€ν† λ¦¬: {parent_chunk.story or 'μ—†μŒ'}
"""
# Graph Extraction ν”„λ‘¬ν”„νŠΈ 생성
from app.prompts.graph_extraction import get_graph_extraction_prompt
prompt = get_graph_extraction_prompt(
episode_content=episode_content,
episode_title=episode_title,
full_content=full_content,
parent_chunk_info=parent_info,
max_length=10000
)
# λͺ¨λΈλͺ…이 μ—†μœΌλ©΄ κΈ°λ³Έκ°’ μ‚¬μš© (Gemini μš°μ„  μ‹œλ„)
response_text = None
if not model_name:
# Gemini μ‹œλ„
try:
gemini_client = get_gemini_client()
if gemini_client.is_configured():
result = gemini_client.generate_response(
prompt=prompt,
model_name="gemini-1.5-flash",
temperature=0.3,
max_output_tokens=3000
)
if not result['error'] and result.get('response'):
response_text = result['response'].strip()
except Exception as e:
print(f"[Graph Extraction] Gemini κΈ°λ³Έ λͺ¨λΈ 였λ₯˜: {str(e)}")
# λͺ¨λΈλͺ…이 μžˆκ±°λ‚˜ Gemini μ‹€νŒ¨ μ‹œ ν•΄λ‹Ή λͺ¨λΈ μ‚¬μš©
if not response_text and model_name:
model_name_lower = model_name.lower().strip()
is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-')
if is_gemini:
gemini_model_name = model_name.strip()
if gemini_model_name.lower().startswith('gemini:'):
gemini_model_name = gemini_model_name.split(':', 1)[1].strip()
gemini_client = get_gemini_client()
if gemini_client.is_configured():
result = gemini_client.generate_response(
prompt=prompt,
model_name=gemini_model_name,
temperature=0.3,
max_output_tokens=3000
)
if not result['error'] and result.get('response'):
response_text = result['response'].strip()
else:
# Ollama API 호좜
try:
# μž…λ ₯ 토큰 수λ₯Ό num_ctx둜 μ‚¬μš©
num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input')
ollama_response = requests.post(
f'{OLLAMA_BASE_URL}/api/generate',
json={
'model': model_name,
'prompt': prompt,
'stream': False,
'options': {
'temperature': 0.3,
'num_predict': 3000,
'num_ctx': num_ctx # μž…λ ₯ 토큰 수λ₯Ό μ»¨ν…μŠ€νŠΈ μœˆλ„μš°λ‘œ μ‚¬μš©
}
},
timeout=300 # 5λΆ„ νƒ€μž„μ•„μ›ƒ
)
if ollama_response.status_code == 200:
response_data = ollama_response.json()
response_text = response_data.get('response', '').strip()
except requests.exceptions.Timeout:
print(f"[Graph Extraction] Ollama νƒ€μž„μ•„μ›ƒ: μš”μ²­ μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (5λΆ„)")
except requests.exceptions.ConnectionError:
print(f"[Graph Extraction] Ollama μ—°κ²° 였λ₯˜: Ollama μ„œλ²„μ— μ—°κ²°ν•  수 μ—†μŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[Graph Extraction] Ollama 였λ₯˜: {str(e)}")
if not response_text:
print(f"[Graph Extraction] '{episode_title}' Graph Extraction μ‹€νŒ¨: 응닡 μ—†μŒ")
return False
# JSON μΆ”μΆœ
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if not json_match:
print(f"[Graph Extraction] '{episode_title}' Graph Extraction μ‹€νŒ¨: JSON ν˜•μ‹μ΄ μ•„λ‹™λ‹ˆλ‹€")
print(f"[Graph Extraction] 응닡 일뢀: {response_text[:500]}")
return False
try:
graph_data = json.loads(json_match.group(0))
except json.JSONDecodeError as e:
print(f"[Graph Extraction] '{episode_title}' JSON νŒŒμ‹± 였λ₯˜: {str(e)}")
print(f"[Graph Extraction] 응닡 일뢀: {response_text[:500]}")
return False
# κΈ°μ‘΄ Graph 데이터 μ‚­μ œ (같은 회차의 κΈ°μ‘΄ 데이터)
GraphEntity.query.filter_by(file_id=file_id, episode_title=episode_title).delete()
GraphRelationship.query.filter_by(file_id=file_id, episode_title=episode_title).delete()
GraphEvent.query.filter_by(file_id=file_id, episode_title=episode_title).delete()
db.session.commit()
# λ°μ΄ν„°λ² μ΄μŠ€μ— μ €μž₯
saved_count = 0
# μ—”ν‹°ν‹° μ €μž₯
entities = graph_data.get('entities', {})
# 인물 μ €μž₯
characters = entities.get('characters', [])
for char in characters:
if char.get('name'):
entity = GraphEntity(
file_id=file_id,
episode_title=episode_title,
entity_name=char.get('name', ''),
entity_type='character',
description=char.get('description'),
role=char.get('role'),
category=None
)
db.session.add(entity)
saved_count += 1
# μž₯μ†Œ μ €μž₯
locations = entities.get('locations', [])
for loc in locations:
if loc.get('name'):
entity = GraphEntity(
file_id=file_id,
episode_title=episode_title,
entity_name=loc.get('name', ''),
entity_type='location',
description=loc.get('description'),
role=None,
category=loc.get('category')
)
db.session.add(entity)
saved_count += 1
# 관계 μ €μž₯
relationships = graph_data.get('relationships', [])
for rel in relationships:
if rel.get('source') and rel.get('target'):
relationship = GraphRelationship(
file_id=file_id,
episode_title=episode_title,
source=rel.get('source', ''),
target=rel.get('target', ''),
relationship_type=rel.get('type', ''),
description=rel.get('description'),
event=rel.get('event')
)
db.session.add(relationship)
saved_count += 1
# 사건 μ €μž₯
events = graph_data.get('events', [])
for event in events:
if event.get('name') or event.get('description'):
participants = event.get('participants', [])
participants_json = json.dumps(participants, ensure_ascii=False) if participants else None
graph_event = GraphEvent(
file_id=file_id,
episode_title=episode_title,
event_name=event.get('name', ''),
description=event.get('description', ''),
participants=participants_json,
location=event.get('location'),
significance=event.get('significance')
)
db.session.add(graph_event)
saved_count += 1
db.session.commit()
print(f"[Graph Extraction] '{episode_title}' Graph Extraction μ™„λ£Œ: {saved_count}개 ν•­λͺ© μ €μž₯")
return True
except Exception as e:
print(f"[Graph Extraction] '{episode_title}' Graph Extraction 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
db.session.rollback()
return False
def create_chunks_for_file(file_id, content, skip_episode_analysis=False, skip_graph_extraction=False):
"""파일 λ‚΄μš©μ„ μ„Ήμ…˜λ³„λ‘œ λΆ„ν• ν•˜μ—¬ 의미 기반 청크둜 μ €μž₯ (벑터 DB 포함)
μ„Ήμ…˜ λΆ„ν•  κ·œμΉ™:
- #μž‘ν’ˆμ„€λͺ…λΆ€ν„° #1ν™”κΉŒμ§€: 'μž‘ν’ˆμ„€λͺ…' μ„Ήμ…˜, 메타데이터에 #μž‘ν’ˆμ„€λͺ… μΆ”κ°€
- #nν™”λΆ€ν„° #n+1ν™”κΉŒμ§€: 'nν™”' μ„Ήμ…˜, 메타데이터에 회차 정보(nν™”) μΆ”κ°€
Args:
file_id: 파일 ID
content: 파일 λ‚΄μš©
skip_episode_analysis: 회차 뢄석 κ±΄λ„ˆλ›°κΈ° (κΈ°λ³Έκ°’: False)
skip_graph_extraction: Graph Extraction κ±΄λ„ˆλ›°κΈ° (κΈ°λ³Έκ°’: False)
"""
try:
print(f"[청크 생성] 파일 ID {file_id}에 λŒ€ν•œ 청크 생성 μ‹œμž‘")
print(f"[청크 생성] 원본 ν…μŠ€νŠΈ 길이: {len(content)}자")
# 파일 정보 κ°€μ Έμ˜€κΈ° (λͺ¨λΈλͺ… λ“±)
uploaded_file = UploadedFile.query.get(file_id)
model_name = uploaded_file.model_name if uploaded_file else None
# 벑터 DB λ§€λ‹ˆμ € κ°€μ Έμ˜€κΈ°
vector_db = get_vector_db()
# κΈ°μ‘΄ 청크 μ‚­μ œ (DB + 벑터 DB)
existing_chunks = DocumentChunk.query.filter_by(file_id=file_id).all()
if existing_chunks:
print(f"[청크 생성] κΈ°μ‘΄ 청크 {len(existing_chunks)}개 μ‚­μ œ 쀑...")
# 벑터 DBμ—μ„œ μ‚­μ œ
vector_db.delete_chunks_by_file_id(file_id)
# DBμ—μ„œ μ‚­μ œ
DocumentChunk.query.filter_by(file_id=file_id).delete()
db.session.commit()
# 원본 μ›Ήμ†Œμ„€μ„ μ„Ήμ…˜λ³„λ‘œ λΆ„ν•  (#μž‘ν’ˆμ„€λͺ…, #1ν™”, #2ν™” λ“±)
sections = split_content_by_episodes(content)
print(f"[청크 생성] μ„Ήμ…˜ λΆ„ν•  μ™„λ£Œ: {len(sections)}개 μ„Ήμ…˜")
for i, (section_type, section_title, section_content, section_metadata) in enumerate(sections):
print(f"[청크 생성] μ„Ήμ…˜ {i+1}: {section_title} ({len(section_content)}자)")
if len(sections) == 0:
print(f"[청크 생성] κ²½κ³ : μ„Ήμ…˜μ΄ μƒμ„±λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
return 0
# κΈ°μ‘΄ 회차 뢄석 μ‚­μ œ
existing_analyses = EpisodeAnalysis.query.filter_by(file_id=file_id).all()
if existing_analyses:
print(f"[회차 뢄석] κΈ°μ‘΄ 회차 뢄석 {len(existing_analyses)}개 μ‚­μ œ 쀑...")
for analysis in existing_analyses:
db.session.delete(analysis)
db.session.commit()
# '#μž‘ν’ˆμ„€λͺ…'을 μ œμ™Έν•œ 각 회차 뢄석
episode_sections = [s for s in sections if s[0] != 'μž‘ν’ˆμ„€λͺ…'] # section_type이 'μž‘ν’ˆμ„€λͺ…'이 μ•„λ‹Œ κ²ƒλ§Œ
if episode_sections and model_name and not skip_episode_analysis:
print(f"[회차 뢄석] {len(episode_sections)}개 회차 뢄석 μ‹œμž‘...")
# Parent Chunk κ°€μ Έμ˜€κΈ°
parent_chunk = None
try:
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
except:
pass
# 각 회차 뢄석 κ²°κ³Όλ₯Ό ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ΄μ–΄μ„œ μ €μž₯
all_analyses = []
for section_type, section_title, section_content, section_metadata in episode_sections:
try:
print(f"[회차 뢄석] '{section_title}' 뢄석 쀑...")
analysis_result = analyze_episode(
episode_content=section_content,
episode_title=section_title,
full_content=content,
parent_chunk=parent_chunk,
model_name=model_name
)
if analysis_result:
all_analyses.append(f"\n\n{analysis_result}")
print(f"[회차 뢄석] '{section_title}' 뢄석 μ™„λ£Œ")
else:
print(f"[회차 뢄석] '{section_title}' 뢄석 μ‹€νŒ¨ (κ²°κ³Ό μ—†μŒ)")
except Exception as e:
print(f"[회차 뢄석] '{section_title}' 뢄석 쀑 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
continue
# λͺ¨λ“  회차 뢄석 κ²°κ³Όλ₯Ό ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ΄μ–΄μ„œ μ €μž₯
if all_analyses:
combined_analysis = "\n".join(all_analyses).strip()
# ν•˜λ‚˜μ˜ 톡합 λΆ„μ„μœΌλ‘œ μ €μž₯ (λ‚˜λˆ μ„œ μ €μž₯ν•˜μ§€ μ•Šκ³  ν•˜λ‚˜μ— μ΄μ–΄μ„œ μ €μž₯)
episode_analysis = EpisodeAnalysis(
file_id=file_id,
episode_title="전체 회차 톡합 뢄석",
analysis_content=combined_analysis # λͺ¨λ“  회차 뢄석을 ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ €μž₯
)
db.session.add(episode_analysis)
db.session.commit()
print(f"[회차 뢄석] μ™„λ£Œ: {len(episode_sections)}개 회차 뢄석 κ²°κ³Όλ₯Ό ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ €μž₯")
else:
print(f"[회차 뢄석] κ²½κ³ : 뢄석 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
# νšŒμ°¨λ³„ Graph Extraction μ‹€ν–‰ (회차 뢄석 성곡 여뢀와 관계없이 μ‹€ν–‰)
if episode_sections and model_name and not skip_graph_extraction:
print(f"[Graph Extraction] νšŒμ°¨λ³„ Graph Extraction μ‹œμž‘...")
# Parent Chunk κ°€μ Έμ˜€κΈ° (회차 뢄석 블둝 λ°–μ—μ„œλ„ μ‚¬μš© κ°€λŠ₯ν•˜λ„λ‘)
parent_chunk = None
try:
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
except:
pass
graph_extraction_success_count = 0
for section_type, section_title, section_content, section_metadata in episode_sections:
try:
print(f"[Graph Extraction] '{section_title}' Graph Extraction 쀑...")
success = extract_graph_from_episode(
episode_content=section_content,
episode_title=section_title,
file_id=file_id,
full_content=content,
parent_chunk=parent_chunk,
model_name=model_name
)
if success:
graph_extraction_success_count += 1
print(f"[Graph Extraction] '{section_title}' Graph Extraction μ™„λ£Œ")
else:
print(f"[Graph Extraction] '{section_title}' Graph Extraction μ‹€νŒ¨")
except Exception as e:
print(f"[Graph Extraction] '{section_title}' Graph Extraction 쀑 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
continue
print(f"[Graph Extraction] μ™„λ£Œ: {graph_extraction_success_count}/{len(episode_sections)}개 회차 Graph Extraction 성곡")
else:
if not model_name:
print(f"[회차 뢄석] λͺ¨λΈλͺ…이 μ—†μ–΄ 회차 뢄석을 κ±΄λ„ˆλœλ‹ˆλ‹€.")
elif not episode_sections:
print(f"[회차 뢄석] 뢄석할 νšŒμ°¨κ°€ μ—†μŠ΅λ‹ˆλ‹€.")
# 각 μ„Ήμ…˜λ³„λ‘œ 청크 생성 및 μ €μž₯
saved_count = 0
vector_saved_count = 0
global_chunk_index = 0 # 전체 청크 인덱슀
for section_idx, (section_type, section_title, section_content, section_metadata) in enumerate(sections):
print(f"[청크 생성] μ„Ήμ…˜ '{section_title}' 처리 쀑... ({len(section_content)}자)")
# 각 μ„Ήμ…˜μ„ 의미 기반 μ²­ν‚Ή (λ¬Έμž₯κ³Ό 문단 경계λ₯Ό κ³ λ €ν•˜μ—¬ λΆ„ν• )
# min_chunk_size: μ΅œμ†Œ 200자, max_chunk_size: μ΅œλŒ€ 1000자, overlap: 150자
section_chunks = split_text_into_chunks(section_content, min_chunk_size=200, max_chunk_size=1000, overlap=150)
print(f"[청크 생성] μ„Ήμ…˜ '{section_title}' λΆ„ν• λœ 청크 수: {len(section_chunks)}개")
# 각 청크λ₯Ό λ°μ΄ν„°λ² μ΄μŠ€μ™€ 벑터 DB에 μ €μž₯
for chunk_idx, chunk_content in enumerate(section_chunks):
try:
# μ„Ήμ…˜ 메타데이터λ₯Ό 기본으둜 μ‚¬μš© (chapter 정보 포함)
chunk_metadata = section_metadata.copy()
# DB에 청크 μ €μž₯ (μ„Ήμ…˜ 메타데이터 포함)
chunk = DocumentChunk(
file_id=file_id,
chunk_index=global_chunk_index,
content=chunk_content,
chunk_metadata=json.dumps(chunk_metadata, ensure_ascii=False) # μ„Ήμ…˜ 메타데이터 μ €μž₯
)
db.session.add(chunk)
db.session.flush() # ID 생성
# 벑터 DB에 청크 μΆ”κ°€
if vector_db.add_chunk(
chunk_id=chunk.id,
chunk_content=chunk_content,
file_id=file_id,
chunk_index=global_chunk_index
):
vector_saved_count += 1
saved_count += 1
global_chunk_index += 1
# μ§„ν–‰ 상황 좜λ ₯ (10κ°œλ§ˆλ‹€)
if saved_count % 10 == 0:
print(f"[청크 생성] μ§„ν–‰ 쀑: {saved_count}개 청크 μ €μž₯ 쀑... (DB: {saved_count}, 벑터 DB: {vector_saved_count})")
except Exception as e:
print(f"[청크 생성] κ²½κ³ : 청크 {global_chunk_index} μ €μž₯ 쀑 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
continue
db.session.commit()
print(f"[청크 생성] μ™„λ£Œ: {saved_count}개 청크가 λ°μ΄ν„°λ² μ΄μŠ€μ— μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€. (벑터 DB: {vector_saved_count}개)")
# μ €μž₯ 확인
verified_count = DocumentChunk.query.filter_by(file_id=file_id).count()
if verified_count != saved_count:
print(f"[청크 생성] κ²½κ³ : μ €μž₯된 청크 수({saved_count})와 ν™•μΈλœ 청크 수({verified_count})κ°€ μΌμΉ˜ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
else:
print(f"[청크 생성] 검증 μ™„λ£Œ: {verified_count}개 청크가 μ •μƒμ μœΌλ‘œ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
return saved_count
except Exception as e:
db.session.rollback()
print(f"[청크 생성] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return 0
def create_parent_chunk_with_ai(file_id, content, model_name):
"""AIλ₯Ό μ‚¬μš©ν•˜μ—¬ Parent Chunk 생성 (μ›Ήμ†Œμ„€ 뢄석)"""
try:
print(f"[Parent Chunk 생성] 파일 ID {file_id}에 λŒ€ν•œ Parent Chunk 생성 μ‹œμž‘")
print(f"[Parent Chunk 생성] μ‚¬μš© λͺ¨λΈ: {model_name}")
print(f"[Parent Chunk 생성] 원본 ν…μŠ€νŠΈ 길이: {len(content)}자")
# λͺ¨λΈλͺ…이 Noneμ΄κ±°λ‚˜ 빈 λ¬Έμžμ—΄μΈ 경우 처리
if not model_name or not model_name.strip():
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: λͺ¨λΈλͺ…이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
return None
# ν…μŠ€νŠΈκ°€ λ„ˆλ¬΄ κΈΈλ©΄ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 50000자)
content_preview = content[:50000] if len(content) > 50000 else content
if len(content) > 50000:
print(f"[Parent Chunk 생성] ν…μŠ€νŠΈκ°€ κΈΈμ–΄ μΌλΆ€λ§Œ μ‚¬μš©: {len(content_preview)}자 (전체: {len(content)}자)")
# 뢄석 ν”„λ‘¬ν”„νŠΈ 생성
analysis_prompt = f"""λ‹€μŒ μ›Ήμ†Œμ„€ ν…μŠ€νŠΈλ₯Ό λΆ„μ„ν•˜μ—¬ λ‹€μŒ ν•­λͺ©λ“€μ„ μž‘μ„±ν•΄μ£Όμ„Έμš”. 각 ν•­λͺ©μ€ λͺ…ν™•ν•˜κ³  ꡬ체적으둜 μž‘μ„±ν•΄μ£Όμ„Έμš”.
ν…μŠ€νŠΈ λ‚΄μš©:
{content_preview}
μœ„ ν…μŠ€νŠΈλ₯Ό λΆ„μ„ν•˜μ—¬ λ‹€μŒ ν˜•μ‹μœΌλ‘œ λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
## 세계관 μ„€λͺ…
[세계관에 λŒ€ν•œ μƒμ„Έν•œ μ„€λͺ…을 μž‘μ„±ν•˜μ„Έμš”. λ°°κ²½, μ„€μ •, κ·œμΉ™ 등을 ν¬ν•¨ν•˜μ„Έμš”.]
## μ£Όμš” 캐릭터 뢄석
[μ£Όμš” λ“±μž₯μΈλ¬Όλ“€μ˜ 이름, μ—­ν• , 성격, νŠΉμ§• 등을 λΆ„μ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”. 각 μΊλ¦­ν„°λ³„λ‘œ κ΅¬λΆ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”.]
## μ£Όμš” μŠ€ν† λ¦¬ 뢄석
[전체적인 μŠ€ν† λ¦¬ 흐름, μ£Όμš” 사건, κ°ˆλ“± ꡬ쑰 등을 λΆ„μ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”.]
## μ£Όμš” μ—ν”Όμ†Œλ“œ 뢄석
[μ€‘μš”ν•œ μ—ν”Όμ†Œλ“œλ‚˜ 챕터별 μ£Όμš” λ‚΄μš©μ„ λΆ„μ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”. μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ •λ¦¬ν•˜λ©΄ μ’‹μŠ΅λ‹ˆλ‹€.]
## 기타
[μœ„ μΉ΄ν…Œκ³ λ¦¬μ— ν¬ν•¨λ˜μ§€ μ•Šμ§€λ§Œ μ€‘μš”ν•œ μ •λ³΄λ‚˜ νŠΉμ§• 등을 μž‘μ„±ν•˜μ„Έμš”.]
각 ν•­λͺ©μ„ λͺ…ν™•ν•˜κ²Œ κ΅¬λΆ„ν•˜μ—¬ μž‘μ„±ν•΄μ£Όμ„Έμš”."""
# λͺ¨λΈ νƒ€μž… 확인 (Gemini λ˜λŠ” Ollama)
# Gemini λͺ¨λΈλͺ… ν˜•μ‹: "gemini:λͺ¨λΈλͺ…" λ˜λŠ” "gemini-1.5-flash" (접두사 μ—†λŠ” κ²½μš°λ„ 지원)
model_name_lower = model_name.lower().strip()
is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-')
print(f"[Parent Chunk 생성] λͺ¨λΈ νƒ€μž… 확인: is_gemini={is_gemini}, model_name={model_name}")
if is_gemini:
# Gemini API 호좜
# λͺ¨λΈλͺ…μ—μ„œ "gemini:" 접두사 제거 (λŒ€μ†Œλ¬Έμž ꡬ뢄 없이)
gemini_model_name = model_name.strip()
if gemini_model_name.lower().startswith('gemini:'):
gemini_model_name = gemini_model_name.split(':', 1)[1].strip()
# "gemini-"둜 μ‹œμž‘ν•˜λŠ” 경우 (예: "gemini-1.5-flash") κ·ΈλŒ€λ‘œ μ‚¬μš©
print(f"[Parent Chunk 생성] Gemini API에 뢄석 μš”μ²­ 전솑 쀑... (λͺ¨λΈ: {gemini_model_name})")
print(f"[Parent Chunk 생성] 원본 λͺ¨λΈλͺ…: {model_name} -> Gemini λͺ¨λΈλͺ…: {gemini_model_name}")
gemini_client = get_gemini_client()
if not gemini_client.is_configured():
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: Gemini API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
print(f"[Parent Chunk 생성] 디버그: Gemini ν΄λΌμ΄μ–ΈνŠΈ μƒνƒœ 확인 쀑...")
# API ν‚€ μƒνƒœ λ‹€μ‹œ 확인
from app.gemini_client import get_gemini_api_key
api_key = get_gemini_api_key()
if api_key:
print(f"[Parent Chunk 생성] 디버그: API ν‚€λŠ” μ‘΄μž¬ν•˜μ§€λ§Œ ν΄λΌμ΄μ–ΈνŠΈκ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. (길이: {len(api_key)})")
else:
print(f"[Parent Chunk 생성] 디버그: API ν‚€κ°€ λ°μ΄ν„°λ² μ΄μŠ€μ— μ—†μŠ΅λ‹ˆλ‹€.")
return None
print(f"[Parent Chunk 생성] Gemini API ν‚€ 확인 μ™„λ£Œ. API 호좜 μ‹œμž‘...")
result = gemini_client.generate_response(
prompt=analysis_prompt,
model_name=gemini_model_name,
temperature=0.7,
max_output_tokens=get_model_token_limit_by_type(model_name or "gemini-1.5-flash", 8192, 'parent_chunk') # Parent Chunk μ „μš© 토큰 수 μ‚¬μš©
)
if result['error']:
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: Gemini API 호좜 μ‹€νŒ¨ - {result['error']}")
print(f"[Parent Chunk 생성] 디버그: result 객체 λ‚΄μš©: {result}")
return None
if not result.get('response'):
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: Gemini API 응닡이 λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
print(f"[Parent Chunk 생성] 디버그: result 객체 λ‚΄μš©: {result}")
return None
analysis_result = result['response']
print(f"[Parent Chunk 생성] Gemini API 응닡 μˆ˜μ‹  성곡: {len(analysis_result)}자")
else:
# Ollama API 호좜
print(f"[Parent Chunk 생성] Ollama API에 뢄석 μš”μ²­ 전솑 쀑... (λͺ¨λΈ: {model_name})")
try:
# μž…λ ₯ 토큰 수λ₯Ό num_ctx둜 μ‚¬μš©
num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input')
ollama_response = requests.post(
f'{OLLAMA_BASE_URL}/api/chat',
json={
'model': model_name,
'messages': [
{
'role': 'user',
'content': analysis_prompt
}
],
'stream': False,
'options': {
'num_ctx': num_ctx # μž…λ ₯ 토큰 수λ₯Ό μ»¨ν…μŠ€νŠΈ μœˆλ„μš°λ‘œ μ‚¬μš©
}
},
timeout=300 # 5λΆ„ νƒ€μž„μ•„μ›ƒ
)
if ollama_response.status_code != 200:
error_detail = ollama_response.text if ollama_response.text else '상세 정보 μ—†μŒ'
if ollama_response.status_code == 404:
error_msg = f'Ollama API 였λ₯˜ 404: λͺ¨λΈ "{model_name}"을(λ₯Ό) 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. λͺ¨λΈμ΄ Ollama에 μ„€μΉ˜λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”.'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
print(f"[Parent Chunk 생성] 디버그: λ§Œμ•½ Gemini λͺ¨λΈμ„ μ‚¬μš©ν•˜λ €λ©΄ λͺ¨λΈλͺ…이 'gemini:' λ˜λŠ” 'gemini-'둜 μ‹œμž‘ν•΄μ•Ό ν•©λ‹ˆλ‹€.")
else:
error_msg = f'Ollama API 였λ₯˜: {ollama_response.status_code} - {error_detail[:200]}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
return None
response_data = ollama_response.json()
analysis_result = response_data.get('message', {}).get('content', '')
print(f"[Parent Chunk 생성] Ollama API 응닡 μˆ˜μ‹  성곡: {len(analysis_result)}자")
except requests.exceptions.Timeout:
print(f"[Parent Chunk 생성] ❌ Ollama νƒ€μž„μ•„μ›ƒ: μš”μ²­ μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (5λΆ„)")
print(f"[Parent Chunk 생성] 파일이 λ„ˆλ¬΄ ν¬κ±°λ‚˜ λͺ¨λΈ 응닡이 느릴 수 μžˆμŠ΅λ‹ˆλ‹€.")
return None
except requests.exceptions.ConnectionError:
print(f"[Parent Chunk 생성] ❌ Ollama μ—°κ²° 였λ₯˜: Ollama μ„œλ²„μ— μ—°κ²°ν•  수 μ—†μŠ΅λ‹ˆλ‹€.")
print(f"[Parent Chunk 생성] 디버그: Ollama URL: {OLLAMA_BASE_URL}")
return None
except requests.exceptions.RequestException as e:
print(f"[Parent Chunk 생성] ❌ Ollama API 였λ₯˜: {str(e)}")
print(f"[Parent Chunk 생성] 디버그: Ollama URL: {OLLAMA_BASE_URL}")
return None
if not analysis_result:
print(f"[Parent Chunk 생성] ⚠️ κ²½κ³ : 뢄석 κ²°κ³Όκ°€ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
return None
print(f"[Parent Chunk 생성] 뢄석 κ²°κ³Ό μˆ˜μ‹  μ™„λ£Œ: {len(analysis_result)}자")
# 뢄석 κ²°κ³Ό νŒŒμ‹±
world_view = ""
characters = ""
story = ""
episodes = ""
others = ""
# 각 μ„Ήμ…˜ μΆ”μΆœ
sections = {
'world_view': ['## 세계관 μ„€λͺ…', '## 세계관', '세계관 μ„€λͺ…'],
'characters': ['## μ£Όμš” 캐릭터 뢄석', '## μ£Όμš” 캐릭터', 'μ£Όμš” 캐릭터 뢄석', '## 캐릭터'],
'story': ['## μ£Όμš” μŠ€ν† λ¦¬ 뢄석', '## μ£Όμš” μŠ€ν† λ¦¬', 'μ£Όμš” μŠ€ν† λ¦¬ 뢄석', '## μŠ€ν† λ¦¬'],
'episodes': ['## μ£Όμš” μ—ν”Όμ†Œλ“œ 뢄석', '## μ£Όμš” μ—ν”Όμ†Œλ“œ', 'μ£Όμš” μ—ν”Όμ†Œλ“œ 뢄석', '## μ—ν”Όμ†Œλ“œ'],
'others': ['## 기타', '기타']
}
lines = analysis_result.split('\n')
current_section = None
current_content = []
for line in lines:
line_stripped = line.strip()
# μ„Ήμ…˜ 헀더 확인
section_found = False
for section_key, section_headers in sections.items():
for header in section_headers:
if header in line_stripped:
# 이전 μ„Ήμ…˜ μ €μž₯
if current_section:
if current_section == 'world_view':
world_view = '\n'.join(current_content).strip()
elif current_section == 'characters':
characters = '\n'.join(current_content).strip()
elif current_section == 'story':
story = '\n'.join(current_content).strip()
elif current_section == 'episodes':
episodes = '\n'.join(current_content).strip()
elif current_section == 'others':
others = '\n'.join(current_content).strip()
current_section = section_key
current_content = []
section_found = True
break
if section_found:
break
if not section_found and current_section:
# ν˜„μž¬ μ„Ήμ…˜μ— λ‚΄μš© μΆ”κ°€
if line_stripped and not line_stripped.startswith('#'):
current_content.append(line)
# λ§ˆμ§€λ§‰ μ„Ήμ…˜ μ €μž₯
if current_section:
if current_section == 'world_view':
world_view = '\n'.join(current_content).strip()
elif current_section == 'characters':
characters = '\n'.join(current_content).strip()
elif current_section == 'story':
story = '\n'.join(current_content).strip()
elif current_section == 'episodes':
episodes = '\n'.join(current_content).strip()
elif current_section == 'others':
others = '\n'.join(current_content).strip()
# νŒŒμ‹± μ‹€νŒ¨ μ‹œ 전체 λ‚΄μš©μ„ "기타"에 μ €μž₯
if not world_view and not characters and not story and not episodes:
print(f"[Parent Chunk 생성] κ²½κ³ : μ„Ήμ…˜ νŒŒμ‹± μ‹€νŒ¨. 전체 λ‚΄μš©μ„ '기타'에 μ €μž₯ν•©λ‹ˆλ‹€.")
others = analysis_result.strip()
# κΈ°μ‘΄ Parent Chunk μ‚­μ œ (있으면)
existing_parent = ParentChunk.query.filter_by(file_id=file_id).first()
if existing_parent:
db.session.delete(existing_parent)
db.session.commit()
print(f"[Parent Chunk 생성] κΈ°μ‘΄ Parent Chunk μ‚­μ œ μ™„λ£Œ")
# Parent Chunk 생성 및 μ €μž₯
parent_chunk = ParentChunk(
file_id=file_id,
world_view=world_view if world_view else None,
characters=characters if characters else None,
story=story if story else None,
episodes=episodes if episodes else None,
others=others if others else None
)
db.session.add(parent_chunk)
db.session.commit()
print(f"[Parent Chunk 생성] βœ… μ™„λ£Œ: Parent Chunkκ°€ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
print(f"[Parent Chunk 생성] - 세계관: {len(world_view)}자")
print(f"[Parent Chunk 생성] - 캐릭터: {len(characters)}자")
print(f"[Parent Chunk 생성] - μŠ€ν† λ¦¬: {len(story)}자")
print(f"[Parent Chunk 생성] - μ—ν”Όμ†Œλ“œ: {len(episodes)}자")
print(f"[Parent Chunk 생성] - 기타: {len(others)}자")
return parent_chunk
except requests.exceptions.RequestException as e:
error_msg = f'Ollama API μ—°κ²° 였λ₯˜: {str(e)}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
import traceback
traceback.print_exc()
return None
except Exception as e:
db.session.rollback()
error_msg = f'Parent Chunk 생성 쀑 였λ₯˜: {str(e)}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
import traceback
traceback.print_exc()
return None
def get_parent_chunks_for_files(file_ids):
"""파일 ID λͺ©λ‘μ— λŒ€ν•œ Parent Chunk 쑰회 (λ¬Έλ§₯ νŒŒμ•…μš©)"""
try:
if not file_ids:
return []
parent_chunks = []
for file_id in file_ids:
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
if parent_chunk:
parent_chunks.append(parent_chunk)
return parent_chunks
except Exception as e:
print(f"[Parent Chunk 쑰회] 였λ₯˜: {str(e)}")
return []
def get_episode_analyses_for_files(file_ids):
"""파일 ID λͺ©λ‘μ— λŒ€ν•œ νšŒμ°¨λ³„ 뢄석(EpisodeAnalysis) 쑰회 (νšŒμ°¨λ³„ μš”μ•½ 참쑰용)"""
try:
if not file_ids:
return []
episode_analyses = []
for file_id in file_ids:
episode_analysis = EpisodeAnalysis.query.filter_by(file_id=file_id).first()
if episode_analysis:
episode_analyses.append(episode_analysis)
return episode_analyses
except Exception as e:
print(f"[νšŒμ°¨λ³„ 뢄석 쑰회] 였λ₯˜: {str(e)}")
return []
def get_relevant_graph_data(query, file_ids=None):
"""질문과 κ΄€λ ¨λœ GraphRAG 데이터 쑰회 (μ—”ν‹°ν‹°, 관계, 사건)
Args:
query: μ‚¬μš©μž 질문
file_ids: 파일 ID λͺ©λ‘ (None이면 λͺ¨λ“  파일)
Returns:
dict: {
'entities': [...],
'relationships': [...],
'events': [...],
'episodes': [...]
}
"""
try:
if not file_ids:
return {
'entities': [],
'relationships': [],
'events': [],
'episodes': []
}
# μ§ˆλ¬Έμ—μ„œ ν‚€μ›Œλ“œ μΆ”μΆœ (ν•œκΈ€ 단어, 영문 단어)
query_words = set(re.findall(r'[κ°€-힣]+|\w+', query.lower()))
# 파일 ID ν™•μž₯ (μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ 파일 포함)
expanded_file_ids = list(file_ids)
for file_id in file_ids:
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
expanded_file_ids.extend([child.id for child in child_files])
# μ—”ν‹°ν‹° 검색 (인물, μž₯μ†Œ 이름이 μ§ˆλ¬Έμ— ν¬ν•¨λœ 경우)
entities = []
if query_words:
# μ—”ν‹°ν‹° 이름에 질문의 ν‚€μ›Œλ“œκ°€ ν¬ν•¨λœ 경우
entity_query = GraphEntity.query.filter(
GraphEntity.file_id.in_(expanded_file_ids)
)
# ν‚€μ›Œλ“œ λ§€μΉ­ (μ—”ν‹°ν‹° μ΄λ¦„μ΄λ‚˜ μ„€λͺ…에 포함)
matching_entities = []
for entity in entity_query.all():
entity_name_lower = entity.entity_name.lower()
entity_desc_lower = (entity.description or '').lower()
# μ—”ν‹°ν‹° μ΄λ¦„μ΄λ‚˜ μ„€λͺ…에 질문 ν‚€μ›Œλ“œκ°€ ν¬ν•¨λ˜μ–΄ μžˆλŠ”μ§€ 확인
if any(word in entity_name_lower or word in entity_desc_lower for word in query_words if len(word) > 1):
matching_entities.append(entity)
entities = matching_entities[:20] # μ΅œλŒ€ 20개
# 관계 검색 (κ΄€κ³„μ˜ μ£Όμ²΄λ‚˜ λŒ€μƒμ΄ μ§ˆλ¬Έμ— ν¬ν•¨λœ 경우)
relationships = []
if query_words:
relationship_query = GraphRelationship.query.filter(
GraphRelationship.file_id.in_(expanded_file_ids)
)
matching_relationships = []
for rel in relationship_query.all():
source_lower = rel.source.lower()
target_lower = rel.target.lower()
rel_type_lower = rel.relationship_type.lower()
rel_desc_lower = (rel.description or '').lower()
# κ΄€κ³„μ˜ 주체, λŒ€μƒ, μœ ν˜•, μ„€λͺ…에 질문 ν‚€μ›Œλ“œκ°€ ν¬ν•¨λ˜μ–΄ μžˆλŠ”μ§€ 확인
if any(word in source_lower or word in target_lower or word in rel_type_lower or word in rel_desc_lower
for word in query_words if len(word) > 1):
matching_relationships.append(rel)
relationships = matching_relationships[:20] # μ΅œλŒ€ 20개
# 사건 검색 (사건 μ΄λ¦„μ΄λ‚˜ μ„€λͺ…에 질문 ν‚€μ›Œλ“œκ°€ ν¬ν•¨λœ 경우)
events = []
if query_words:
event_query = GraphEvent.query.filter(
GraphEvent.file_id.in_(expanded_file_ids)
)
matching_events = []
for event in event_query.all():
event_name_lower = (event.event_name or '').lower()
event_desc_lower = (event.description or '').lower()
event_location_lower = (event.location or '').lower()
# 사건 이름, μ„€λͺ…, μž₯μ†Œμ— 질문 ν‚€μ›Œλ“œκ°€ ν¬ν•¨λ˜μ–΄ μžˆλŠ”μ§€ 확인
if any(word in event_name_lower or word in event_desc_lower or word in event_location_lower
for word in query_words if len(word) > 1):
matching_events.append(event)
events = matching_events[:20] # μ΅œλŒ€ 20개
# κ΄€λ ¨ 회차 μΆ”μΆœ
episodes = set()
for entity in entities:
episodes.add(entity.episode_title)
for rel in relationships:
episodes.add(rel.episode_title)
for event in events:
episodes.add(event.episode_title)
print(f"[GraphRAG 검색] κ΄€λ ¨ 데이터 발견: μ—”ν‹°ν‹° {len(entities)}개, 관계 {len(relationships)}개, 사건 {len(events)}개, 회차 {len(episodes)}개")
return {
'entities': [e.to_dict() for e in entities],
'relationships': [r.to_dict() for r in relationships],
'events': [ev.to_dict() for ev in events],
'episodes': sorted(list(episodes))
}
except Exception as e:
print(f"[GraphRAG 검색] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return {
'entities': [],
'relationships': [],
'events': [],
'episodes': []
}
def search_relevant_chunks(query, file_ids=None, model_name=None, top_k=5, min_score=1):
"""
질문과 κ΄€λ ¨λœ 청크 검색 (벑터 검색 + Re-ranking)
1. 벑터 κ²€μƒ‰μœΌλ‘œ 초기 30개 λ¬Έμ„œ 검색
2. Cross-Encoder둜 λ¦¬λž­ν‚Ή
3. μƒμœ„ top_k개 λ°˜ν™˜ (κΈ°λ³Έ 5개)
"""
try:
# 벑터 DB λ§€λ‹ˆμ € κ°€μ Έμ˜€κΈ°
vector_db = get_vector_db()
# 파일 ID ν™•μž₯ (μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ 파일 포함)
expanded_file_ids = None
if file_ids:
expanded_file_ids = list(file_ids)
for file_id in file_ids:
# 원본 파일인 경우 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
expanded_file_ids.extend([child.id for child in child_files])
# 원본 파일이 μ„ νƒλœ 경우, μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all()
for parent_file in parent_files:
child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all()
expanded_file_ids.extend([child.id for child in child_files])
# λͺ¨λΈ 필터링이 ν•„μš”ν•œ 경우 파일 ID 필터링
if model_name and expanded_file_ids:
filtered_files = UploadedFile.query.filter(
UploadedFile.id.in_(expanded_file_ids),
UploadedFile.model_name == model_name
).all()
expanded_file_ids = [f.id for f in filtered_files]
elif model_name and not expanded_file_ids:
# 파일 IDκ°€ μ—†μœΌλ©΄ λͺ¨λΈ μ΄λ¦„μœΌλ‘œλ§Œ 필터링
filtered_files = UploadedFile.query.filter_by(model_name=model_name).all()
expanded_file_ids = [f.id for f in filtered_files]
# 1단계: 벑터 κ²€μƒ‰μœΌλ‘œ 초기 30개 λ¬Έμ„œ 검색
print(f"[벑터 검색] 쿼리: {query[:50]}..., 파일 ID: {expanded_file_ids if expanded_file_ids else 'λͺ¨λ“  파일'}")
vector_results = vector_db.search_chunks(
query=query,
file_ids=expanded_file_ids,
top_k=30
)
if not vector_results:
print(f"[벑터 검색] κ²°κ³Ό μ—†μŒ, ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄")
# 벑터 검색 κ²°κ³Όκ°€ μ—†μœΌλ©΄ κΈ°μ‘΄ ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄
return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score)
# 2단계: Cross-Encoder둜 λ¦¬λž­ν‚Ή
print(f"[λ¦¬λž­ν‚Ή] {len(vector_results)}개 청크에 λŒ€ν•œ λ¦¬λž­ν‚Ή μ‹œμž‘...")
reranked_chunks = vector_db.rerank_chunks(
query=query,
chunks=vector_results,
top_k=top_k
)
# 3단계: DBμ—μ„œ 청크 객체 κ°€μ Έμ˜€κΈ°
final_chunks = []
for reranked in reranked_chunks:
chunk_id = reranked['chunk_id']
chunk = DocumentChunk.query.get(chunk_id)
if chunk:
final_chunks.append(chunk)
print(f"[벑터 검색 + λ¦¬λž­ν‚Ή] μ΅œμ’… {len(final_chunks)}개 청크 λ°˜ν™˜")
return final_chunks
except Exception as e:
print(f"[벑터 검색] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
# 였λ₯˜ μ‹œ κΈ°μ‘΄ ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄
print(f"[벑터 검색] ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄")
return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score)
def search_relevant_chunks_fallback(query, file_ids=None, model_name=None, top_k=25, min_score=1):
"""κΈ°μ‘΄ ν‚€μ›Œλ“œ 기반 검색 (Fallback)"""
try:
# 검색 쿼리 μ€€λΉ„ - ν•œκΈ€κ³Ό 영문 단어 λͺ¨λ‘ μΆ”μΆœ
query_words = set(re.findall(r'[κ°€-힣]+|\w+', query.lower()))
if not query_words:
return []
# 청크 쑰회
query_obj = DocumentChunk.query.join(UploadedFile)
if file_ids:
# μ„ νƒλœ 파일 ID와 κ·Έ νŒŒμΌμ— μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ λͺ¨λ“  파일 ID 포함
expanded_file_ids = list(file_ids)
for file_id in file_ids:
# 원본 파일인 경우 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
expanded_file_ids.extend([child.id for child in child_files])
# 원본 파일이 μ„ νƒλœ 경우, μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all()
for parent_file in parent_files:
child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all()
expanded_file_ids.extend([child.id for child in child_files])
query_obj = query_obj.filter(UploadedFile.id.in_(expanded_file_ids))
if model_name:
query_obj = query_obj.filter(UploadedFile.model_name == model_name)
all_chunks = query_obj.all()
if not all_chunks:
return []
# 각 청크의 관련도 점수 계산 (κ°œμ„ λœ μ•Œκ³ λ¦¬μ¦˜)
scored_chunks = []
for chunk in all_chunks:
chunk_content_lower = chunk.content.lower()
chunk_words = set(re.findall(r'[κ°€-힣]+|\w+', chunk_content_lower))
# 1. 곡톡 단어 수 (κΈ°λ³Έ 점수)
common_words = query_words & chunk_words
base_score = len(common_words)
# 2. 쿼리 λ‹¨μ–΄μ˜ λΉˆλ„ κ°€μ€‘μΉ˜ (μ€‘μš”ν•œ 단어가 더 많이 λ‚˜νƒ€λ‚ μˆ˜λ‘ 높은 점수)
frequency_score = 0
for word in query_words:
frequency_score += chunk_content_lower.count(word)
# 3. 쿼리 단어 λΉ„μœ¨ (μ²­ν¬μ—μ„œ 쿼리 단어가 μ°¨μ§€ν•˜λŠ” λΉ„μœ¨)
if len(chunk_words) > 0:
ratio_score = len(common_words) / len(chunk_words) * 10
else:
ratio_score = 0
# μ΅œμ’… 점수 계산 (κ°€μ€‘μΉ˜ 적용)
final_score = base_score * 2 + frequency_score * 0.5 + ratio_score
# μ΅œμ†Œ 점수 이상인 청크만 포함
if final_score >= min_score:
scored_chunks.append((final_score, chunk))
# 점수 순으둜 μ •λ ¬ν•˜κ³  μƒμœ„ k개 선택
scored_chunks.sort(key=lambda x: x[0], reverse=True)
# top_k개 선택
top_chunks = [chunk for score, chunk in scored_chunks[:top_k]]
return top_chunks
except Exception as e:
print(f"[ν‚€μ›Œλ“œ 검색] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return []
@main_bp.route('/login', methods=['GET', 'POST'])
def login():
"""둜그인 νŽ˜μ΄μ§€"""
if current_user.is_authenticated:
# κ΄€λ¦¬μžμΈ 경우 κ΄€λ¦¬μž νŽ˜μ΄μ§€λ‘œ λ¦¬λ‹€μ΄λ ‰νŠΈ
if current_user.is_admin:
return redirect(url_for('main.admin'))
return redirect(url_for('main.index'))
if request.method == 'POST':
username = request.form.get('username', '').strip()
password = request.form.get('password', '')
if not username or not password:
flash('μ‚¬μš©μžλͺ…κ³Ό λΉ„λ°€λ²ˆν˜Έλ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.', 'error')
return render_template('login.html')
user = User.query.filter_by(username=username).first()
if user and user.check_password(password) and user.is_active:
login_user(user)
user.last_login = datetime.utcnow()
db.session.commit()
next_page = request.args.get('next')
# κ΄€λ¦¬μžμΈ 경우 κ΄€λ¦¬μž νŽ˜μ΄μ§€λ‘œ λ¦¬λ‹€μ΄λ ‰νŠΈ
if user.is_admin:
return redirect(next_page) if next_page else redirect(url_for('main.admin'))
return redirect(next_page) if next_page else redirect(url_for('main.index'))
else:
flash('μ‚¬μš©μžλͺ… λ˜λŠ” λΉ„λ°€λ²ˆν˜Έκ°€ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.', 'error')
return render_template('login.html')
@main_bp.route('/logout')
@login_required
def logout():
"""λ‘œκ·Έμ•„μ›ƒ"""
logout_user()
flash('λ‘œκ·Έμ•„μ›ƒλ˜μ—ˆμŠ΅λ‹ˆλ‹€.', 'info')
return redirect(url_for('main.login'))
@main_bp.route('/')
@login_required
def index():
return render_template('index.html')
@main_bp.route('/webnovels')
@login_required
def webnovels():
"""μ—…λ‘œλ“œλœ μ›Ήμ†Œμ„€ λͺ©λ‘ νŽ˜μ΄μ§€"""
return render_template('webnovels.html')
@main_bp.route('/admin')
@admin_required
def admin():
"""κ΄€λ¦¬μž νŽ˜μ΄μ§€"""
users = User.query.order_by(User.created_at.desc()).all()
return render_template('admin.html', users=users)
@main_bp.route('/admin/messages')
@admin_required
def admin_messages():
"""κ΄€λ¦¬μž λ©”μ‹œμ§€ 확인 νŽ˜μ΄μ§€"""
return render_template('admin_messages.html')
@main_bp.route('/admin/webnovels')
@admin_required
def admin_webnovels():
"""μ›Ήμ†Œμ„€ 관리 νŽ˜μ΄μ§€"""
return render_template('admin_webnovels.html')
@main_bp.route('/admin/prompts')
@admin_required
def admin_prompts():
"""ν”„λ‘¬ν”„νŠΈ 관리 νŽ˜μ΄μ§€"""
return render_template('admin_prompts.html')
@main_bp.route('/admin/settings')
@admin_required
def admin_settings():
"""AI μ„€μ • 관리 νŽ˜μ΄μ§€ (API ν‚€, 토큰 수)"""
return render_template('admin_settings.html')
@main_bp.route('/admin/files')
@admin_required
def admin_files():
"""파일 λͺ©λ‘ 관리 νŽ˜μ΄μ§€"""
return render_template('admin_files.html')
@main_bp.route('/api/admin/users', methods=['GET'])
@admin_required
def get_users():
"""μ‚¬μš©μž λͺ©λ‘ API"""
try:
users = User.query.order_by(User.created_at.desc()).all()
return jsonify({
'users': [user.to_dict() for user in users]
}), 200
except Exception as e:
return jsonify({'error': f'μ‚¬μš©μž λͺ©λ‘ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/users', methods=['POST'])
@admin_required
def create_user():
"""μ‚¬μš©μž 생성 API"""
try:
data = request.json
username = data.get('username', '').strip()
nickname = data.get('nickname', '').strip()
password = data.get('password', '')
is_admin = data.get('is_admin', False)
if not username or not password:
return jsonify({'error': 'μ‚¬μš©μžλͺ…κ³Ό λΉ„λ°€λ²ˆν˜Έλ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.'}), 400
if User.query.filter_by(username=username).first():
return jsonify({'error': '이미 μ‘΄μž¬ν•˜λŠ” μ‚¬μš©μžλͺ…μž…λ‹ˆλ‹€.'}), 400
user = User(username=username, nickname=nickname if nickname else None, is_admin=is_admin, is_active=True)
user.set_password(password)
db.session.add(user)
db.session.commit()
return jsonify({
'message': 'μ‚¬μš©μžκ°€ μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'user': user.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'μ‚¬μš©μž 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/users/<int:user_id>', methods=['PUT'])
@admin_required
def update_user(user_id):
"""μ‚¬μš©μž 정보 μˆ˜μ • API"""
try:
user = User.query.get_or_404(user_id)
data = request.json
# 자기 μžμ‹ μ˜ κ΄€λ¦¬μž κΆŒν•œμ„ μ œκ±°ν•˜λŠ” 것은 λ°©μ§€
if user_id == current_user.id and data.get('is_admin') == False:
return jsonify({'error': '자기 μžμ‹ μ˜ κ΄€λ¦¬μž κΆŒν•œμ„ μ œκ±°ν•  수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
if 'username' in data:
new_username = data['username'].strip()
if new_username != user.username:
if User.query.filter_by(username=new_username).first():
return jsonify({'error': '이미 μ‘΄μž¬ν•˜λŠ” μ‚¬μš©μžλͺ…μž…λ‹ˆλ‹€.'}), 400
user.username = new_username
if 'nickname' in data:
user.nickname = data['nickname'].strip() if data['nickname'] else None
if 'password' in data and data['password']:
user.set_password(data['password'])
if 'is_admin' in data:
user.is_admin = data['is_admin']
if 'is_active' in data:
user.is_active = data['is_active']
db.session.commit()
return jsonify({
'message': 'μ‚¬μš©μž 정보가 μ„±κ³΅μ μœΌλ‘œ μˆ˜μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'user': user.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'μ‚¬μš©μž 정보 μˆ˜μ • 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/messages', methods=['GET'])
@admin_required
def get_all_messages():
"""전체 λ©”μ‹œμ§€ 쑰회 (κ΄€λ¦¬μžμš©)"""
try:
user_id = request.args.get('user_id', type=int)
session_id = request.args.get('session_id', type=int)
message_id = request.args.get('message_id', type=int)
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 50, type=int)
query = ChatMessage.query.join(ChatSession)
if user_id:
query = query.filter(ChatSession.user_id == user_id)
if session_id:
query = query.filter(ChatMessage.session_id == session_id)
if message_id:
query = query.filter(ChatMessage.id == message_id)
messages = query.order_by(ChatMessage.created_at.desc())\
.paginate(page=page, per_page=per_page, error_out=False)
return jsonify({
'messages': [msg.to_dict() for msg in messages.items],
'total': messages.total,
'pages': messages.pages,
'current_page': page
}), 200
except Exception as e:
return jsonify({'error': f'λ©”μ‹œμ§€ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/sessions', methods=['GET'])
@admin_required
def get_all_sessions():
"""전체 λŒ€ν™” μ„Έμ…˜ 쑰회 (κ΄€λ¦¬μžμš©)"""
try:
user_id = request.args.get('user_id', type=int)
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 50, type=int)
query = ChatSession.query
if user_id:
query = query.filter(ChatSession.user_id == user_id)
sessions = query.order_by(ChatSession.updated_at.desc())\
.paginate(page=page, per_page=per_page, error_out=False)
sessions_data = []
for session in sessions.items:
session_dict = session.to_dict()
session_dict['username'] = session.user.username if session.user else 'Unknown'
session_dict['nickname'] = session.user.nickname if session.user else None
sessions_data.append(session_dict)
return jsonify({
'sessions': sessions_data,
'total': sessions.total,
'pages': sessions.pages,
'current_page': page
}), 200
except Exception as e:
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/users/<int:user_id>', methods=['DELETE'])
@admin_required
def delete_user(user_id):
"""μ‚¬μš©μž μ‚­μ œ API"""
try:
user = User.query.get_or_404(user_id)
# 자기 μžμ‹ μ„ μ‚­μ œν•˜λŠ” 것은 λ°©μ§€
if user_id == current_user.id:
return jsonify({'error': '자기 μžμ‹ μ„ μ‚­μ œν•  수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
db.session.delete(user)
db.session.commit()
return jsonify({'message': 'μ‚¬μš©μžκ°€ μ„±κ³΅μ μœΌλ‘œ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.'}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'μ‚¬μš©μž μ‚­μ œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/gemini-api-key', methods=['GET'])
@admin_required
def get_gemini_api_key():
"""Gemini API ν‚€ 쑰회"""
try:
# SystemConfigμ—μ„œ API ν‚€ κ°€μ Έμ˜€κΈ° (ν…Œμ΄λΈ”μ΄ μ—†μœΌλ©΄ 빈 λ¬Έμžμ—΄ λ°˜ν™˜)
api_key = SystemConfig.get_config('gemini_api_key', '')
# λ³΄μ•ˆμ„ μœ„ν•΄ λ§ˆμŠ€ν‚Ήλœ κ°’ λ°˜ν™˜ (처음 8자만 ν‘œμ‹œ)
masked_key = api_key[:8] + '...' if api_key and len(api_key) > 8 else ''
return jsonify({
'has_api_key': bool(api_key),
'masked_key': masked_key
}), 200
except Exception as e:
print(f"[Gemini API ν‚€ 쑰회] 였λ₯˜: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'API ν‚€ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/huggingface-token', methods=['GET'])
@admin_required
def get_huggingface_token():
"""Hugging Face 토큰 쑰회"""
try:
from app.huggingface_client import get_huggingface_token
token = get_huggingface_token()
# λ³΄μ•ˆμ„ μœ„ν•΄ λ§ˆμŠ€ν‚Ήλœ κ°’ λ°˜ν™˜ (처음 8자만 ν‘œμ‹œ)
masked_token = token[:8] + '...' if token and len(token) > 8 else ''
return jsonify({
'has_token': bool(token),
'masked_token': masked_token
}), 200
except Exception as e:
print(f"[Hugging Face 토큰 쑰회] 였λ₯˜: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'토큰 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/model-tokens', methods=['GET'])
@admin_required
def get_model_tokens():
"""λͺ¨λ“  λͺ¨λΈμ˜ 토큰 수 μ„€μ • 쑰회 (μž…λ ₯/좜λ ₯ 뢄리)"""
try:
# Ollama λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
ollama_models = []
try:
response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5)
if response.status_code == 200:
data = response.json()
ollama_models = [model['name'] for model in data.get('models', [])]
except:
pass
# Gemini λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
gemini_models = []
try:
gemini_client = get_gemini_client()
if gemini_client.is_configured():
gemini_models = gemini_client.get_available_models()
gemini_models = [f'gemini:{m}' for m in gemini_models]
except:
pass
# λͺ¨λ“  λͺ¨λΈ λͺ©λ‘
all_models = ollama_models + gemini_models
# 각 λͺ¨λΈμ˜ 토큰 수 μ„€μ • κ°€μ Έμ˜€κΈ° (μž…λ ₯/좜λ ₯/Parent Chunk 뢄리)
model_input_tokens = {}
model_output_tokens = {}
model_parent_chunk_tokens = {}
default_input_tokens = {}
default_output_tokens = {}
default_parent_chunk_tokens = {}
# λͺ¨λΈλ³„ κΈ°λ³Έκ°’ κ²°μ •
def get_default_token_for_model(model_name, token_type='output'):
"""λͺ¨λΈλ³„ κΈ°λ³Έ 토큰 수 κ²°μ •"""
if not model_name:
if token_type == 'parent_chunk':
return 8192
return 2000 if token_type == 'output' else 100000
# Gemini λͺ¨λΈμ˜ 경우
if model_name.startswith('gemini:'):
if token_type == 'parent_chunk':
return 8192 # Parent Chunk κΈ°λ³Έκ°’
return 2000 if token_type == 'output' else 100000 # Gemini μž…λ ₯ 기본값은 더 큼
# Ollama λͺ¨λΈμ˜ 경우
if token_type == 'parent_chunk':
return 8192 # Parent Chunk κΈ°λ³Έκ°’
return 2000 if token_type == 'output' else 100000 # Ollama μž…λ ₯ 기본값도 더 큼
for model_name in all_models:
# μž…λ ₯ 토큰 μ„€μ • κ°€μ Έμ˜€κΈ°
input_config_key = f"model_token_input_{model_name}"
input_token_value = SystemConfig.get_config(input_config_key)
default_input_token = get_default_token_for_model(model_name, 'input')
default_input_tokens[model_name] = default_input_token
if input_token_value:
try:
model_input_tokens[model_name] = int(input_token_value)
except (ValueError, TypeError):
model_input_tokens[model_name] = None
else:
model_input_tokens[model_name] = None
# 좜λ ₯ 토큰 μ„€μ • κ°€μ Έμ˜€κΈ°
output_config_key = f"model_token_output_{model_name}"
output_token_value = SystemConfig.get_config(output_config_key)
# ν•˜μœ„ ν˜Έν™˜μ„±: κΈ°μ‘΄ ν˜•μ‹λ„ 확인
if not output_token_value:
old_config_key = f"model_token_{model_name}"
output_token_value = SystemConfig.get_config(old_config_key)
default_output_token = get_default_token_for_model(model_name, 'output')
default_output_tokens[model_name] = default_output_token
if output_token_value:
try:
model_output_tokens[model_name] = int(output_token_value)
except (ValueError, TypeError):
model_output_tokens[model_name] = None
else:
model_output_tokens[model_name] = None
# Parent Chunk 토큰 μ„€μ • κ°€μ Έμ˜€κΈ°
parent_chunk_config_key = f"model_token_parent_chunk_{model_name}"
parent_chunk_token_value = SystemConfig.get_config(parent_chunk_config_key)
default_parent_chunk_token = get_default_token_for_model(model_name, 'parent_chunk')
default_parent_chunk_tokens[model_name] = default_parent_chunk_token
if parent_chunk_token_value:
try:
model_parent_chunk_tokens[model_name] = int(parent_chunk_token_value)
except (ValueError, TypeError):
model_parent_chunk_tokens[model_name] = None
else:
model_parent_chunk_tokens[model_name] = None
return jsonify({
'models': all_models,
'input_tokens': model_input_tokens,
'output_tokens': model_output_tokens,
'parent_chunk_tokens': model_parent_chunk_tokens,
'default_input_tokens': default_input_tokens,
'default_output_tokens': default_output_tokens,
'default_parent_chunk_tokens': default_parent_chunk_tokens
}), 200
except Exception as e:
return jsonify({'error': f'토큰 수 μ„€μ • 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/model-tokens', methods=['POST'])
@admin_required
def save_model_tokens():
"""λͺ¨λΈλ³„ 토큰 수 μ„€μ • μ €μž₯ (μž…λ ₯/좜λ ₯ 뢄리, λ˜λŠ” μ‚­μ œ)"""
try:
data = request.json
model_name = data.get('model_name', '').strip()
token_type = data.get('token_type', 'output').strip() # 'input' λ˜λŠ” 'output'
tokens = data.get('tokens', None)
if not model_name:
return jsonify({'error': 'λͺ¨λΈλͺ…을 μž…λ ₯ν•΄μ£Όμ„Έμš”.'}), 400
if token_type not in ['input', 'output', 'parent_chunk']:
return jsonify({'error': '토큰 νƒ€μž…μ€ "input", "output", λ˜λŠ” "parent_chunk"이어야 ν•©λ‹ˆλ‹€.'}), 400
# tokensκ°€ None이면 μ„€μ • μ‚­μ œ (κΈ°λ³Έκ°’ μ‚¬μš©)
if tokens is None:
try:
config_key = f"model_token_{token_type}_{model_name}"
config = SystemConfig.query.filter_by(key=config_key).first()
if config:
db.session.delete(config)
db.session.commit()
return jsonify({
'message': f'{model_name} λͺ¨λΈμ˜ {token_type} 토큰 수 섀정이 μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. 기본값을 μ‚¬μš©ν•©λ‹ˆλ‹€.',
'model_name': model_name,
'token_type': token_type,
'tokens': None
}), 200
else:
# ν•˜μœ„ ν˜Έν™˜μ„±: κΈ°μ‘΄ ν˜•μ‹λ„ μ‚­μ œ μ‹œλ„ (좜λ ₯ 토큰인 경우)
if token_type == 'output':
old_config_key = f"model_token_{model_name}"
old_config = SystemConfig.query.filter_by(key=old_config_key).first()
if old_config:
db.session.delete(old_config)
db.session.commit()
return jsonify({
'message': f'{model_name} λͺ¨λΈμ˜ 좜λ ₯ 토큰 수 섀정이 μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. 기본값을 μ‚¬μš©ν•©λ‹ˆλ‹€.',
'model_name': model_name,
'token_type': token_type,
'tokens': None
}), 200
return jsonify({
'message': f'{model_name} λͺ¨λΈμ€ 이미 기본값을 μ‚¬μš©ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€.',
'model_name': model_name,
'token_type': token_type,
'tokens': None
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'μ„€μ • μ‚­μ œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
try:
tokens = int(tokens)
if tokens < 1:
return jsonify({'error': '토큰 μˆ˜λŠ” 1 이상이어야 ν•©λ‹ˆλ‹€.'}), 400
except (ValueError, TypeError):
return jsonify({'error': '토큰 μˆ˜λŠ” μ •μˆ˜μ—¬μ•Ό ν•©λ‹ˆλ‹€.'}), 400
# SystemConfig에 μ €μž₯
config_key = f"model_token_{token_type}_{model_name}"
SystemConfig.set_config(config_key, str(tokens), f'{model_name} λͺ¨λΈ {token_type} 토큰 수 μ œν•œ')
return jsonify({
'message': f'{model_name} λͺ¨λΈμ˜ {token_type} 토큰 μˆ˜κ°€ {tokens}둜 μ„€μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'model_name': model_name,
'token_type': token_type,
'tokens': tokens
}), 200
except Exception as e:
db.session.rollback()
print(f"[토큰 수 μ €μž₯] 였λ₯˜: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'토큰 수 μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/gemini-api-key', methods=['POST'])
@admin_required
def set_gemini_api_key():
"""Gemini API ν‚€ μ €μž₯/μ—…λ°μ΄νŠΈ"""
try:
if not request.is_json:
return jsonify({'error': 'Content-Type이 application/json이 μ•„λ‹™λ‹ˆλ‹€.'}), 400
data = request.json
if not data:
return jsonify({'error': 'μš”μ²­ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
api_key = data.get('api_key', '').strip()
if not api_key:
return jsonify({'error': 'API ν‚€λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.'}), 400
# API ν‚€ μ €μž₯ (SystemConfig.set_config λ‚΄λΆ€μ—μ„œ ν…Œμ΄λΈ” 생성 처리)
try:
SystemConfig.set_config(
key='gemini_api_key',
value=api_key,
description='Google Gemini API ν‚€'
)
# μ €μž₯ 확인
saved_key = SystemConfig.get_config('gemini_api_key', '')
if saved_key == api_key:
print(f"[Gemini API ν‚€ μ €μž₯] 성곡: μ €μž₯ 확인됨 (길이: {len(api_key)}자)")
else:
print(f"[Gemini API ν‚€ μ €μž₯] κ²½κ³ : μ €μž₯ ν›„ 확인 μ‹€νŒ¨. μ €μž₯된 κ°’: {saved_key[:20] if saved_key else 'None'}...")
except Exception as save_error:
print(f"[Gemini API ν‚€ μ €μž₯] 였λ₯˜: {save_error}")
import traceback
traceback.print_exc()
return jsonify({'error': f'API ν‚€ μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(save_error)}'}), 500
# Gemini ν΄λΌμ΄μ–ΈνŠΈμ— API ν‚€ μž¬λ‘œλ“œ μ•Œλ¦Ό
try:
from app.gemini_client import reset_gemini_client
reset_gemini_client()
print(f"[Gemini] API ν‚€κ°€ μ—…λ°μ΄νŠΈλ˜μ–΄ ν΄λΌμ΄μ–ΈνŠΈκ°€ μž¬λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[Gemini] API ν‚€ μž¬λ‘œλ“œ μ‹€νŒ¨: {e}")
# μ΅œμ’… 확인: DBμ—μ„œ μ‹€μ œλ‘œ μ €μž₯λ˜μ—ˆλŠ”μ§€ 확인
final_check = SystemConfig.get_config('gemini_api_key', '')
if not final_check:
print(f"[Gemini API ν‚€ μ €μž₯] κ²½κ³ : μ €μž₯ ν›„ DBμ—μ„œ 쑰회 μ‹€νŒ¨")
return jsonify({
'error': 'API ν‚€ μ €μž₯ ν›„ 확인에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. λ°μ΄ν„°λ² μ΄μŠ€ 연결을 ν™•μΈν•˜μ„Έμš”.',
'saved': False
}), 500
return jsonify({
'message': 'Gemini API ν‚€κ°€ μ„±κ³΅μ μœΌλ‘œ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'has_api_key': True,
'masked_key': api_key[:8] + '...' if api_key and len(api_key) > 8 else '',
'saved': True,
'config_count': SystemConfig.query.count() # ν˜„μž¬ μ„€μ • 개수 λ°˜ν™˜
}), 200
except Exception as e:
db.session.rollback()
print(f"[Gemini API ν‚€ μ €μž₯] 였λ₯˜: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'API ν‚€ μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/huggingface-token', methods=['POST'])
@admin_required
def set_huggingface_token():
"""Hugging Face 토큰 μ €μž₯/μ—…λ°μ΄νŠΈ"""
try:
if not request.is_json:
return jsonify({'error': 'Content-Type이 application/json이 μ•„λ‹™λ‹ˆλ‹€.'}), 400
data = request.json
if not data:
return jsonify({'error': 'μš”μ²­ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
token = data.get('token', '').strip()
if not token:
return jsonify({'error': '토큰을 μž…λ ₯ν•΄μ£Όμ„Έμš”.'}), 400
# 토큰 μ €μž₯ (SystemConfig.set_config λ‚΄λΆ€μ—μ„œ ν…Œμ΄λΈ” 생성 처리)
SystemConfig.set_config(
key='huggingface_token',
value=token,
description='Hugging Face API 토큰'
)
# Hugging Face ν΄λΌμ΄μ–ΈνŠΈμ— 토큰 μž¬λ‘œλ“œ μ•Œλ¦Ό
try:
from app.huggingface_client import reset_huggingface_token
reset_huggingface_token()
print(f"[Hugging Face] 토큰이 μ—…λ°μ΄νŠΈλ˜μ–΄ ν΄λΌμ΄μ–ΈνŠΈκ°€ μž¬λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[Hugging Face] 토큰 μž¬λ‘œλ“œ μ‹€νŒ¨: {e}")
return jsonify({
'message': 'Hugging Face 토큰이 μ„±κ³΅μ μœΌλ‘œ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'has_token': True
}), 200
except Exception as e:
db.session.rollback()
print(f"[Hugging Face 토큰 μ €μž₯] 였λ₯˜: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'토큰 μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/ollama/models', methods=['GET'])
@login_required
def get_ollama_models():
"""Ollama 및 Geminiμ—μ„œ μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ° (둜컬 AI λͺ¨λΈμ€ ν•™μŠ΅λœ μ›Ήμ†Œμ„€μ΄ μžˆλŠ” λͺ¨λΈλ§Œ ν‘œμ‹œ)"""
try:
# 쿼리 νŒŒλΌλ―Έν„°λ‘œ all=trueκ°€ μ „λ‹¬λ˜λ©΄ λͺ¨λ“  λͺ¨λΈ λ°˜ν™˜
show_all = request.args.get('all', 'false').lower() == 'true'
all_models = []
# 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
try:
response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5)
if response.status_code == 200:
data = response.json()
ollama_models_raw = [model['name'] for model in data.get('models', [])]
if show_all:
# λͺ¨λ“  Ollama λͺ¨λΈ λ°˜ν™˜
ollama_models = [{'name': model_name, 'type': 'ollama'} for model_name in ollama_models_raw]
all_models.extend(ollama_models)
print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(ollama_models)}개 μΆ”κ°€ (전체 λͺ©λ‘)")
else:
# ν•™μŠ΅λœ μ›Ήμ†Œμ„€μ΄ μžˆλŠ” λͺ¨λΈλ§Œ 필터링
filtered_ollama_models = []
for model_name in ollama_models_raw:
# ν•΄λ‹Ή λͺ¨λΈλ‘œ ν•™μŠ΅λœ 원본 파일이 μžˆλŠ”μ§€ 확인 (parent_file_idκ°€ None인 파일만)
file_count = UploadedFile.query.filter_by(
model_name=model_name,
parent_file_id=None
).count()
if file_count > 0:
filtered_ollama_models.append({'name': model_name, 'type': 'ollama'})
print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - ν•™μŠ΅λœ μ›Ήμ†Œμ„€ {file_count}개")
else:
print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - ν•™μŠ΅λœ μ›Ήμ†Œμ„€ μ—†μŒ, λͺ©λ‘μ—μ„œ μ œμ™Έ")
all_models.extend(filtered_ollama_models)
print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(filtered_ollama_models)}개 μΆ”κ°€ (전체 {len(ollama_models_raw)}개 쀑 {len(filtered_ollama_models)}개 필터링됨)")
except Exception as e:
print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ 쑰회 μ‹€νŒ¨: {e}")
# 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
try:
gemini_client = get_gemini_client()
if gemini_client.is_configured():
gemini_models = gemini_client.get_available_models()
if show_all:
# λͺ¨λ“  Gemini λͺ¨λΈ λ°˜ν™˜
gemini_models_list = [{'name': f'gemini:{model_name}', 'type': 'gemini'} for model_name in gemini_models]
all_models.extend(gemini_models_list)
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(gemini_models_list)}개 μΆ”κ°€ (전체 λͺ©λ‘)")
else:
# ν•™μŠ΅λœ μ›Ήμ†Œμ„€μ΄ μžˆλŠ” λͺ¨λΈλ§Œ 필터링
filtered_gemini_models = []
for model_name in gemini_models:
full_model_name = f'gemini:{model_name}'
# ν•΄λ‹Ή λͺ¨λΈλ‘œ ν•™μŠ΅λœ 원본 파일이 μžˆλŠ”μ§€ 확인 (parent_file_idκ°€ None인 파일만)
file_count = UploadedFile.query.filter_by(
model_name=full_model_name,
parent_file_id=None
).count()
if file_count > 0:
filtered_gemini_models.append({'name': full_model_name, 'type': 'gemini'})
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - ν•™μŠ΅λœ μ›Ήμ†Œμ„€ {file_count}개")
else:
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - ν•™μŠ΅λœ μ›Ήμ†Œμ„€ μ—†μŒ, λͺ©λ‘μ—μ„œ μ œμ™Έ")
all_models.extend(filtered_gemini_models)
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(filtered_gemini_models)}개 μΆ”κ°€ (전체 {len(gemini_models)}개 쀑 {len(filtered_gemini_models)}개 필터링됨)")
else:
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•„ Gemini λͺ¨λΈμ„ 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ 쑰회 μ‹€νŒ¨: {e}")
if all_models:
return jsonify({'models': all_models})
else:
return jsonify({'error': 'μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈμ΄ μ—†μŠ΅λ‹ˆλ‹€. Ollamaκ°€ μ‹€ν–‰ 쀑인지, λ˜λŠ” Gemini API ν‚€κ°€ μ„€μ •λ˜μ—ˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”.', 'models': []}), 500
except Exception as e:
return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ„ κ°€μ Έμ˜€λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}', 'models': []}), 500
@main_bp.route('/api/admin/prompts', methods=['GET'])
@admin_required
def get_system_prompt():
"""μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ κ°€μ Έμ˜€κΈ°"""
try:
prompt = SystemConfig.get_config('system_prompt', '')
return jsonify({'prompt': prompt}), 200
except Exception as e:
return jsonify({'error': f'ν”„λ‘¬ν”„νŠΈλ₯Ό κ°€μ Έμ˜€λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/prompts', methods=['POST'])
@admin_required
def save_system_prompt():
"""μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ μ €μž₯"""
try:
data = request.json
prompt = data.get('prompt', '').strip()
SystemConfig.set_config(
key='system_prompt',
value=prompt,
description='μ§ˆλ¬Έν•  λ•Œ μžλ™μœΌλ‘œ λΆ™μ΄λŠ” μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ'
)
return jsonify({
'message': 'ν”„λ‘¬ν”„νŠΈκ°€ μ„±κ³΅μ μœΌλ‘œ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'prompt': prompt
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'ν”„λ‘¬ν”„νŠΈ μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/database/status', methods=['GET'])
@admin_required
def get_database_status():
"""λ°μ΄ν„°λ² μ΄μŠ€ μ—°κ²° μƒνƒœ 확인"""
try:
from flask import current_app
from sqlalchemy import create_engine, text
from datetime import datetime
db_uri = current_app.config['SQLALCHEMY_DATABASE_URI']
is_postgresql = db_uri.startswith('postgresql://') or db_uri.startswith('postgres://')
# μ—°κ²° 정보 (λ³΄μ•ˆμ„ μœ„ν•΄ λΉ„λ°€λ²ˆν˜Έ λ§ˆμŠ€ν‚Ή)
if is_postgresql and '@' in db_uri:
masked_uri = db_uri.split('@')[0].split('://')[0] + '://***@' + '@'.join(db_uri.split('@')[1:])
else:
masked_uri = db_uri
status = {
'connected': False,
'type': 'PostgreSQL' if is_postgresql else 'SQLite',
'uri_masked': masked_uri,
'version': None,
'error': None,
'test_query': None,
'table_count': 0,
'user_count': 0,
'config_count': 0
}
# μ—°κ²° ν…ŒμŠ€νŠΈ
try:
if is_postgresql:
# PostgreSQL μ—°κ²° ν…ŒμŠ€νŠΈ
engine = create_engine(db_uri)
with engine.connect() as conn:
# 버전 확인
result = conn.execute(text("SELECT version()"))
version = result.fetchone()[0]
status['version'] = version[:100] # 처음 100자만
# ν…Œμ΄λΈ” 개수 확인
result = conn.execute(text("""
SELECT COUNT(*)
FROM information_schema.tables
WHERE table_schema = 'public'
"""))
status['table_count'] = result.fetchone()[0]
# μ‚¬μš©μž 개수 확인
result = conn.execute(text("SELECT COUNT(*) FROM \"user\""))
status['user_count'] = result.fetchone()[0]
# μ„€μ • 개수 확인
result = conn.execute(text("SELECT COUNT(*) FROM system_config"))
status['config_count'] = result.fetchone()[0]
# ν…ŒμŠ€νŠΈ 쿼리
result = conn.execute(text("SELECT NOW()"))
test_time = result.fetchone()[0]
status['test_query'] = f"ν˜„μž¬ μ‹œκ°„: {test_time}"
status['connected'] = True
else:
# SQLite μ—°κ²° ν…ŒμŠ€νŠΈ
from sqlalchemy import inspect
inspector = inspect(db.engine)
tables = inspector.get_table_names()
status['table_count'] = len(tables)
# μ‚¬μš©μž 개수 확인
user_count = User.query.count()
status['user_count'] = user_count
# μ„€μ • 개수 확인
config_count = SystemConfig.query.count()
status['config_count'] = config_count
# SQLite 버전 확인
with db.engine.connect() as conn:
result = conn.execute(text("SELECT sqlite_version()"))
version = result.fetchone()[0]
status['version'] = f"SQLite {version}"
# ν…ŒμŠ€νŠΈ 쿼리
result = conn.execute(text("SELECT datetime('now')"))
test_time = result.fetchone()[0]
status['test_query'] = f"ν˜„μž¬ μ‹œκ°„: {test_time}"
status['connected'] = True
except Exception as e:
status['error'] = str(e)
status['connected'] = False
return jsonify(status), 200 if status['connected'] else 500
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({
'error': f'λ°μ΄ν„°λ² μ΄μŠ€ μƒνƒœ 확인 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}',
'connected': False
}), 500
@main_bp.route('/api/admin/ollama/models', methods=['GET'])
@admin_required
def get_all_ollama_models():
"""κ΄€λ¦¬μžμš©: Ollama 및 Geminiμ—μ„œ μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λ“  λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ° (필터링 없이 전체 λͺ©λ‘)"""
try:
all_models = []
# 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ° (필터링 없이 전체 λͺ©λ‘)
try:
response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5)
if response.status_code == 200:
data = response.json()
ollama_models_raw = [model['name'] for model in data.get('models', [])]
# 필터링 없이 λͺ¨λ“  Ollama λͺ¨λΈ μΆ”κ°€
for model_name in ollama_models_raw:
# 각 λͺ¨λΈμ˜ ν•™μŠ΅λœ μ›Ήμ†Œμ„€ 개수 확인 (정보 제곡용)
file_count = UploadedFile.query.filter_by(
model_name=model_name,
parent_file_id=None
).count()
all_models.append({
'name': model_name,
'type': 'ollama',
'file_count': file_count # 정보 제곡용
})
print(f"[κ΄€λ¦¬μž λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - ν•™μŠ΅λœ μ›Ήμ†Œμ„€ {file_count}개")
print(f"[κ΄€λ¦¬μž λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(ollama_models_raw)}개 μΆ”κ°€")
except Exception as e:
print(f"[κ΄€λ¦¬μž λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ 쑰회 μ‹€νŒ¨: {e}")
# 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ° (필터링 없이 전체 λͺ©λ‘)
try:
gemini_client = get_gemini_client()
if gemini_client.is_configured():
gemini_models = gemini_client.get_available_models()
# 필터링 없이 λͺ¨λ“  Gemini λͺ¨λΈ μΆ”κ°€
for model_name in gemini_models:
full_model_name = f'gemini:{model_name}'
# 각 λͺ¨λΈμ˜ ν•™μŠ΅λœ μ›Ήμ†Œμ„€ 개수 확인 (정보 제곡용)
file_count = UploadedFile.query.filter_by(
model_name=full_model_name,
parent_file_id=None
).count()
all_models.append({
'name': full_model_name,
'type': 'gemini',
'file_count': file_count # 정보 제곡용
})
print(f"[κ΄€λ¦¬μž λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - ν•™μŠ΅λœ μ›Ήμ†Œμ„€ {file_count}개")
print(f"[κ΄€λ¦¬μž λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(gemini_models)}개 μΆ”κ°€")
else:
print(f"[κ΄€λ¦¬μž λͺ¨λΈ λͺ©λ‘] Gemini API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•„ Gemini λͺ¨λΈμ„ 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[κ΄€λ¦¬μž λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ 쑰회 μ‹€νŒ¨: {e}")
if all_models:
return jsonify({'models': all_models})
else:
return jsonify({'error': 'μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈμ΄ μ—†μŠ΅λ‹ˆλ‹€. Ollamaκ°€ μ‹€ν–‰ 쀑인지, λ˜λŠ” Gemini API ν‚€κ°€ μ„€μ •λ˜μ—ˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”.', 'models': []}), 500
except Exception as e:
return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ„ κ°€μ Έμ˜€λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}', 'models': []}), 500
@main_bp.route('/api/chat', methods=['POST'])
@login_required
def chat():
"""μ±„νŒ… API μ—”λ“œν¬μΈνŠΈ"""
try:
data = request.json
message = data.get('message', '')
# ν•˜μœ„ ν˜Έν™˜μ„±μ„ μœ„ν•΄ model도 확인 (κΈ°μ‘΄ μ½”λ“œ)
analysis_model = data.get('analysis_model', data.get('model', '')) # 질문 λΆ„μ„μš© λͺ¨λΈ
answer_model = data.get('answer_model', '') # μ΅œμ’… λ‹΅λ³€μš© λͺ¨λΈ
file_ids = [int(fid) for fid in data.get('file_ids', []) if fid] # μ„ νƒν•œ μ›Ήμ†Œμ„€ 파일 ID λͺ©λ‘
session_id = data.get('session_id', None) # λŒ€ν™” μ„Έμ…˜ ID (μ •μˆ˜λ‘œ λ³€ν™˜)
if not message:
return jsonify({'error': 'λ©”μ‹œμ§€κ°€ ν•„μš”ν•©λ‹ˆλ‹€.'}), 400
# λ‹΅λ³€μš© λͺ¨λΈμ΄ μ—†μœΌλ©΄ λΆ„μ„μš© λͺ¨λΈ μ‚¬μš© (ν•˜μœ„ ν˜Έν™˜μ„±)
if not answer_model:
answer_model = analysis_model
# λ‹΅λ³€μš© λͺ¨λΈμ΄ μ—¬μ „νžˆ μ—†μœΌλ©΄ μ—λŸ¬ λ°˜ν™˜
if not answer_model:
return jsonify({'error': '닡변을 생성할 AI λͺ¨λΈμ΄ μ„ νƒλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. "μ‚¬μš© κ°€λŠ₯ν•œ AI λͺ©λ‘"μ—μ„œ 닡변을 생성할 AI λͺ¨λΈμ„ μ„ νƒν•΄μ£Όμ„Έμš”.'}), 400
# λΆ„μ„μš© λͺ¨λΈμ΄ μ„ νƒλœ 경우 RAG 검색 μ§„ν–‰
if analysis_model:
try:
# RAG: 질문과 κ΄€λ ¨λœ 청크 검색
context = ""
use_rag = True # RAG μ‚¬μš© μ—¬λΆ€
if use_rag:
print(f"\n[RAG 검색] 뢄석 λͺ¨λΈ: {analysis_model}, λ‹΅λ³€ λͺ¨λΈ: {answer_model}, 질문: {message[:50]}...")
print(f"[RAG 검색] μ„ νƒλœ 파일 ID: {file_ids if file_ids else 'μ—†μŒ (λͺ¨λ“  파일 검색)'}")
# 1단계: νšŒμ°¨λ³„ 뢄석(EpisodeAnalysis) 쑰회 (νšŒμ°¨λ³„ μš”μ•½ 참쑰용)
episode_analyses = []
if file_ids:
print(f"[RAG 검색 1단계] νšŒμ°¨λ³„ 뢄석 쑰회 μ‹œμž‘...")
episode_analyses = get_episode_analyses_for_files(file_ids)
print(f"[RAG 검색 1단계] νšŒμ°¨λ³„ 뢄석 쑰회 μ™„λ£Œ: {len(episode_analyses)}개 파일")
# 2단계: GraphRAG 데이터 쑰회 (μ—”ν‹°ν‹°, 관계, 사건)
graph_data = None
if file_ids:
print(f"[RAG 검색 2단계] GraphRAG 데이터 쑰회 μ‹œμž‘...")
graph_data = get_relevant_graph_data(
query=message,
file_ids=file_ids
)
print(f"[RAG 검색 2단계] GraphRAG 데이터 쑰회 μ™„λ£Œ: μ—”ν‹°ν‹° {len(graph_data['entities'])}개, 관계 {len(graph_data['relationships'])}개, 사건 {len(graph_data['events'])}개")
# 3단계: 벑터 검색 + λ¦¬λž­ν‚ΉμœΌλ‘œ Child Chunk μ •λ°€ 검색 (뢄석 λͺ¨λΈ μ‚¬μš©)
print(f"[RAG 검색 3단계] 벑터 검색 + λ¦¬λž­ν‚Ή μ‹œμž‘ (뢄석 λͺ¨λΈ: {analysis_model})...")
relevant_chunks = search_relevant_chunks(
query=message,
file_ids=file_ids if file_ids else None,
model_name=analysis_model, # 질문 뢄석은 analysis_model μ‚¬μš©
top_k=5, # λ¦¬λž­ν‚Ή ν›„ μƒμœ„ 5개만 선택
min_score=0.5 # μ΅œμ†Œ 점수 μž„κ³„κ°’
)
print(f"[RAG 검색 3단계] 벑터 검색 + λ¦¬λž­ν‚Ή μ™„λ£Œ: {len(relevant_chunks)}개 청크 (μƒμœ„ 5개)")
# μ»¨ν…μŠ€νŠΈ ꡬ성
context_parts = []
# GraphRAG 데이터 μΆ”κ°€ (μ—”ν‹°ν‹°, 관계, 사건 정보)
if graph_data and (graph_data['entities'] or graph_data['relationships'] or graph_data['events']):
graph_context_parts = []
# μ—”ν‹°ν‹° 정보
if graph_data['entities']:
entity_sections = {}
for entity in graph_data['entities']:
episode = entity.get('episode_title', '기타')
if episode not in entity_sections:
entity_sections[episode] = {'characters': [], 'locations': []}
if entity.get('entity_type') == 'character':
entity_sections[episode]['characters'].append(entity)
elif entity.get('entity_type') == 'location':
entity_sections[episode]['locations'].append(entity)
entity_text = "λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ λ“±μž₯인물 및 μž₯μ†Œ μ •λ³΄μž…λ‹ˆλ‹€:\n\n"
for episode, entities in entity_sections.items():
entity_text += f"=== {episode} ===\n"
if entities['characters']:
entity_text += "인물:\n"
for char in entities['characters']:
entity_text += f"- {char.get('entity_name', '')}"
if char.get('role'):
entity_text += f" (μ—­ν• : {char.get('role')})"
if char.get('description'):
entity_text += f": {char.get('description')}"
entity_text += "\n"
if entities['locations']:
entity_text += "μž₯μ†Œ:\n"
for loc in entities['locations']:
entity_text += f"- {loc.get('entity_name', '')}"
if loc.get('category'):
entity_text += f" (μœ ν˜•: {loc.get('category')})"
if loc.get('description'):
entity_text += f": {loc.get('description')}"
entity_text += "\n"
entity_text += "\n"
graph_context_parts.append(entity_text)
# 관계 정보
if graph_data['relationships']:
rel_sections = {}
for rel in graph_data['relationships']:
episode = rel.get('episode_title', '기타')
if episode not in rel_sections:
rel_sections[episode] = []
rel_sections[episode].append(rel)
rel_text = "λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ 인물/μž₯μ†Œ κ°„μ˜ 관계 μ •λ³΄μž…λ‹ˆλ‹€:\n\n"
for episode, rels in rel_sections.items():
rel_text += f"=== {episode} ===\n"
for rel in rels:
rel_text += f"- {rel.get('source', '')} β†’ {rel.get('target', '')}"
if rel.get('relationship_type'):
rel_text += f" ({rel.get('relationship_type')})"
if rel.get('description'):
rel_text += f": {rel.get('description')}"
if rel.get('event'):
rel_text += f" [κ΄€λ ¨ 사건: {rel.get('event')}]"
rel_text += "\n"
rel_text += "\n"
graph_context_parts.append(rel_text)
# 사건 정보
if graph_data['events']:
event_sections = {}
for event in graph_data['events']:
episode = event.get('episode_title', '기타')
if episode not in event_sections:
event_sections[episode] = []
event_sections[episode].append(event)
event_text = "λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ μ£Όμš” 사건 μ •λ³΄μž…λ‹ˆλ‹€:\n\n"
for episode, events in event_sections.items():
event_text += f"=== {episode} ===\n"
for event in events:
if event.get('event_name'):
event_text += f"- {event.get('event_name')}\n"
if event.get('description'):
event_text += f" μ„€λͺ…: {event.get('description')}\n"
if event.get('participants') and len(event.get('participants', [])) > 0:
event_text += f" κ΄€λ ¨ 인물: {', '.join(event.get('participants', []))}\n"
if event.get('location'):
event_text += f" μž₯μ†Œ: {event.get('location')}\n"
if event.get('significance'):
event_text += f" μ€‘μš”λ„: {event.get('significance')}\n"
event_text += "\n"
graph_context_parts.append(event_text)
if graph_context_parts:
graph_context = "\n\n".join(graph_context_parts)
context_parts.append(f"λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ GraphRAG λ°μ΄ν„°μž…λ‹ˆλ‹€ (μ—”ν‹°ν‹°, 관계, 사건 정보):\n\n{graph_context}")
print(f"[RAG 검색] GraphRAG μ»¨ν…μŠ€νŠΈ μΆ”κ°€: {len(graph_context)}자")
# νšŒμ°¨λ³„ 뢄석 정보 μΆ”κ°€ (νšŒμ°¨λ³„ μš”μ•½ 참쑰용)
if episode_analyses:
episode_context_sections = []
for episode_analysis in episode_analyses:
file = episode_analysis.file
file_info = f"\n=== {file.original_filename} νšŒμ°¨λ³„ 뢄석 ===\n"
if episode_analysis.analysis_content:
episode_context_sections.append(file_info + episode_analysis.analysis_content)
if episode_context_sections:
episode_context = "\n\n".join(episode_context_sections)
context_parts.append(f"λ‹€μŒμ€ μ›Ήμ†Œμ„€μ˜ νšŒμ°¨λ³„ 상세 뢄석 λ‚΄μš©μž…λ‹ˆλ‹€:\n\n{episode_context}")
print(f"[RAG 검색] νšŒμ°¨λ³„ 뢄석 μ»¨ν…μŠ€νŠΈ μΆ”κ°€: {len(episode_context)}자")
# Child Chunk 정보 μΆ”κ°€ (μ •λ°€ 검색 κ²°κ³Ό)
if relevant_chunks:
child_context_parts = []
seen_files = set()
for chunk in relevant_chunks:
file = chunk.file
if file.original_filename not in seen_files:
seen_files.add(file.original_filename)
print(f"[RAG 검색] μ‚¬μš©λœ 파일: {file.original_filename} (λͺ¨λΈ: {file.model_name})")
child_context_parts.append(f"[{file.original_filename} - 청크 {chunk.chunk_index + 1}]\n{chunk.content}")
if child_context_parts:
# μ»¨ν…μŠ€νŠΈ 길이 확인 및 μ΅œμ ν™”
full_child_context = "\n\n".join(child_context_parts)
child_context_length = len(full_child_context)
# Child Chunk μ»¨ν…μŠ€νŠΈκ°€ λ„ˆλ¬΄ κΈΈλ©΄ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 15000자)
if child_context_length > 15000:
truncated_parts = []
current_length = 0
for part in child_context_parts:
if current_length + len(part) > 15000:
break
truncated_parts.append(part)
current_length += len(part)
full_child_context = "\n\n".join(truncated_parts)
print(f"[RAG 검색] Child Chunk μ»¨ν…μŠ€νŠΈ 길이 쑰절: {child_context_length}자 β†’ {len(full_child_context)}자")
context_parts.append(f"λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ μ›Ήμ†Œμ„€μ˜ ꡬ체적인 λ‚΄μš©μž…λ‹ˆλ‹€ (μ •λ°€ 검색 κ²°κ³Ό, 총 {len(relevant_chunks)}개 청크):\n\n{full_child_context}")
print(f"[RAG 검색] Child Chunk μ»¨ν…μŠ€νŠΈ μΆ”κ°€: {len(full_child_context)}자")
# μ΅œμ’… μ»¨ν…μŠ€νŠΈ ꡬ성
if context_parts:
full_context = "\n\n" + "\n\n---\n\n".join(context_parts) + "\n\n"
# νšŒμ°¨λ³„ 뢄석, GraphRAG, Child Chunk λͺ¨λ‘ μžˆλŠ” 경우
has_graph = graph_data and (graph_data['entities'] or graph_data['relationships'] or graph_data['events'])
if episode_analyses and has_graph and relevant_chunks:
context = f"""λ‹€μŒμ€ μ§ˆλ¬Έμ— λ‹΅ν•˜κΈ° μœ„ν•œ μ›Ήμ†Œμ„€ μ •λ³΄μž…λ‹ˆλ‹€:
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
- λ¨Όμ € GraphRAG 데이터(μ—”ν‹°ν‹°, 관계, 사건)λ₯Ό ν™•μΈν•˜μ—¬ λ“±μž₯인물, μž₯μ†Œ, 인물 κ°„μ˜ 관계, μ£Όμš” 사건을 νŒŒμ•…ν•˜μ„Έμš”.
- κ·Έ λ‹€μŒ νšŒμ°¨λ³„ 뢄석 λ‚΄μš©μ„ μ΄ν•΄ν•˜μ—¬ 각 회차의 μ£Όμš” μŠ€ν† λ¦¬, λ“±μž₯ 인물, 인물 관계 λ³€ν™”λ₯Ό νŒŒμ•…ν•˜μ„Έμš”.
- λ§ˆμ§€λ§‰μœΌλ‘œ ꡬ체적인 λ‚΄μš©(Child Chunk)을 톡해 μ§ˆλ¬Έμ— λŒ€ν•œ μ •ν™•ν•œ 닡변을 μ œκ³΅ν•˜μ„Έμš”.
- μ›Ήμ†Œμ„€μ˜ λ§₯락과 μŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ 일관성 μžˆλŠ” 닡변을 μž‘μ„±ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
elif episode_analyses and relevant_chunks:
context = f"""λ‹€μŒμ€ μ§ˆλ¬Έμ— λ‹΅ν•˜κΈ° μœ„ν•œ μ›Ήμ†Œμ„€ μ •λ³΄μž…λ‹ˆλ‹€:
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
- λ¨Όμ € νšŒμ°¨λ³„ 뢄석 λ‚΄μš©μ„ μ΄ν•΄ν•˜μ—¬ 각 회차의 μ£Όμš” μŠ€ν† λ¦¬, λ“±μž₯ 인물, 인물 관계 λ³€ν™”λ₯Ό νŒŒμ•…ν•˜μ„Έμš”.
- κ·Έ λ‹€μŒ ꡬ체적인 λ‚΄μš©(Child Chunk)을 톡해 μ§ˆλ¬Έμ— λŒ€ν•œ μ •ν™•ν•œ 닡변을 μ œκ³΅ν•˜μ„Έμš”.
- μ›Ήμ†Œμ„€μ˜ λ§₯락과 μŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ 일관성 μžˆλŠ” 닡변을 μž‘μ„±ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
elif has_graph and relevant_chunks:
context = f"""λ‹€μŒμ€ μ§ˆλ¬Έμ— λ‹΅ν•˜κΈ° μœ„ν•œ μ›Ήμ†Œμ„€ μ •λ³΄μž…λ‹ˆλ‹€:
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
- λ¨Όμ € GraphRAG 데이터(μ—”ν‹°ν‹°, 관계, 사건)λ₯Ό ν™•μΈν•˜μ—¬ λ“±μž₯인물, μž₯μ†Œ, 인물 κ°„μ˜ 관계, μ£Όμš” 사건을 νŒŒμ•…ν•˜μ„Έμš”.
- κ·Έ λ‹€μŒ ꡬ체적인 λ‚΄μš©(Child Chunk)을 톡해 μ§ˆλ¬Έμ— λŒ€ν•œ μ •ν™•ν•œ 닡변을 μ œκ³΅ν•˜μ„Έμš”.
- μ›Ήμ†Œμ„€μ˜ λ§₯락과 μŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ 일관성 μžˆλŠ” 닡변을 μž‘μ„±ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
elif episode_analyses and has_graph:
# νšŒμ°¨λ³„ 뢄석과 GraphRAG만 μžˆλŠ” 경우
context = f"""λ‹€μŒμ€ μ›Ήμ†Œμ„€μ˜ νšŒμ°¨λ³„ 상세 뢄석 및 GraphRAG λ°μ΄ν„°μž…λ‹ˆλ‹€:
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ μ§ˆλ¬Έμ— λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
- GraphRAG 데이터(μ—”ν‹°ν‹°, 관계, 사건)λ₯Ό ν™•μΈν•˜μ—¬ λ“±μž₯인물, μž₯μ†Œ, 인물 κ°„μ˜ 관계, μ£Όμš” 사건을 νŒŒμ•…ν•˜μ„Έμš”.
- νšŒμ°¨λ³„ 뢄석 λ‚΄μš©μ„ μ΄ν•΄ν•˜μ—¬ 각 회차의 μ£Όμš” μŠ€ν† λ¦¬, λ“±μž₯ 인물, 인물 관계 λ³€ν™”λ₯Ό κ³ λ €ν•˜μ—¬ λ‹΅λ³€ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
elif episode_analyses:
# νšŒμ°¨λ³„ λΆ„μ„λ§Œ μžˆλŠ” 경우
context = f"""λ‹€μŒμ€ μ›Ήμ†Œμ„€μ˜ νšŒμ°¨λ³„ 상세 뢄석 λ‚΄μš©μž…λ‹ˆλ‹€:
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ μ§ˆλ¬Έμ— λ‹΅λ³€ν•΄μ£Όμ„Έμš”. 각 회차의 μ£Όμš” μŠ€ν† λ¦¬, λ“±μž₯ 인물, 인물 관계 λ³€ν™”λ₯Ό κ³ λ €ν•˜μ—¬ λ‹΅λ³€ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
elif has_graph:
# GraphRAG만 μžˆλŠ” 경우
context = f"""λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ GraphRAG λ°μ΄ν„°μž…λ‹ˆλ‹€ (μ—”ν‹°ν‹°, 관계, 사건 정보):
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ μ§ˆλ¬Έμ— λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
- GraphRAG 데이터λ₯Ό ν™•μΈν•˜μ—¬ λ“±μž₯인물, μž₯μ†Œ, 인물 κ°„μ˜ 관계, μ£Όμš” 사건을 νŒŒμ•…ν•˜μ„Έμš”.
- μ›Ήμ†Œμ„€μ˜ λ§₯락과 μŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ 일관성 μžˆλŠ” 닡변을 μž‘μ„±ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
else:
# Child Chunk만 μžˆλŠ” 경우
context = f"""λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ μ›Ήμ†Œμ„€μ˜ ꡬ체적인 λ‚΄μš©μž…λ‹ˆλ‹€:
{full_context}
μœ„ λ‚΄μš©μ„ μΆ©λΆ„νžˆ μ°Έκ³ ν•˜μ—¬ λ‹€μŒ μ§ˆλ¬Έμ— μ •ν™•ν•˜κ³  μƒμ„Έν•˜κ²Œ λ‹΅λ³€ν•΄μ£Όμ„Έμš”. μ›Ήμ†Œμ„€μ˜ λ§₯락과 μŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ λ‹΅λ³€ν•΄μ£Όμ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
context += message
graph_info = f", GraphRAG: {len(graph_data['entities']) if graph_data else 0}개 μ—”ν‹°ν‹°, {len(graph_data['relationships']) if graph_data else 0}개 관계, {len(graph_data['events']) if graph_data else 0}개 사건" if graph_data else ""
print(f"[RAG 검색] μ΅œμ’… μ»¨ν…μŠ€νŠΈ 생성 μ™„λ£Œ (νšŒμ°¨λ³„ 뢄석: {len(episode_analyses)}개{graph_info}, Child Chunk: {len(relevant_chunks)}개, 총 {len(context)}자)")
else:
# RAG 검색 κ²°κ³Όκ°€ μ—†μœΌλ©΄ κΈ°μ‘΄ 방식 μ‚¬μš©
print(f"[RAG 검색] κ΄€λ ¨ 청크λ₯Ό μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€. 전체 파일 λ‚΄μš© μ‚¬μš©")
use_rag = False
# RAG 검색 κ²°κ³Όκ°€ μ—†κ±°λ‚˜ λΉ„ν™œμ„±ν™”λœ 경우 κΈ°μ‘΄ 방식 μ‚¬μš©
if not context and not use_rag:
if file_ids:
# μ„ νƒν•œ 파일 ID와 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
expanded_file_ids = list(file_ids)
for file_id in file_ids:
# 원본 파일인 경우 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
expanded_file_ids.extend([child.id for child in child_files])
uploaded_files = UploadedFile.query.filter(
UploadedFile.id.in_(expanded_file_ids),
UploadedFile.model_name == analysis_model
).all()
print(f"[파일 μ‚¬μš©] μ„ νƒλœ 파일 ID둜 쑰회 (μ΄μ–΄μ„œ μ—…λ‘œλ“œ 포함): {len(uploaded_files)}개 파일")
else:
# 파일 IDκ°€ μ—†μœΌλ©΄ ν•΄λ‹Ή λͺ¨λΈμ˜ λͺ¨λ“  파일 μ‚¬μš© (원본 및 μ΄μ–΄μ„œ μ—…λ‘œλ“œ 포함)
uploaded_files = UploadedFile.query.filter_by(model_name=analysis_model).all()
print(f"[파일 μ‚¬μš©] λͺ¨λΈ '{analysis_model}'의 λͺ¨λ“  파일 μ‚¬μš©: {len(uploaded_files)}개 파일")
if uploaded_files:
print(f"[파일 μ‚¬μš©] μ‚¬μš©λ˜λŠ” 파일 λͺ©λ‘:")
for f in uploaded_files:
is_child = f.parent_file_id is not None
prefix = " └─ " if is_child else " - "
print(f"{prefix}{f.original_filename} (λͺ¨λΈ: {f.model_name})")
context_parts = []
for file in uploaded_files:
try:
if os.path.exists(file.file_path):
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
file_content = f.read()
except UnicodeDecodeError:
with open(file.file_path, 'r', encoding='cp949') as f:
file_content = f.read()
# 파일 λ‚΄μš©μ΄ λ„ˆλ¬΄ κΈΈλ©΄ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 20000자둜 증가)
if len(file_content) > 20000:
file_content = file_content[:20000] + "..."
context_parts.append(f"[{file.original_filename}]\n{file_content}")
except Exception as e:
print(f"파일 읽기 였λ₯˜ ({file.original_filename}): {str(e)}")
continue
if context_parts:
context = "\n\n".join(context_parts)
context = f"""λ‹€μŒμ€ ν•™μŠ΅λœ μ›Ήμ†Œμ„€ λ‚΄μš©μž…λ‹ˆλ‹€:
{context}
μœ„ λ‚΄μš©μ„ μ°Έκ³ ν•˜μ—¬ λ‹€μŒ μ§ˆλ¬Έμ— λ‹΅λ³€ν•΄μ£Όμ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
# μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ κ°€μ Έμ˜€κΈ°
system_prompt = SystemConfig.get_config('system_prompt', '').strip()
# ν”„λ‘¬ν”„νŠΈ ꡬ성 (μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ + μ»¨ν…μŠ€νŠΈ + μ‚¬μš©μž λ©”μ‹œμ§€)
prompt_parts = []
if system_prompt:
prompt_parts.append(system_prompt)
if context:
prompt_parts.append(context)
prompt_parts.append(message)
full_prompt = "\n\n".join(prompt_parts)
if system_prompt:
print(f"[ν”„λ‘¬ν”„νŠΈ] μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ 적용: {len(system_prompt)}자")
# μ΅œμ’… λ‹΅λ³€ 생성은 answer_model μ‚¬μš©
if not answer_model:
return jsonify({'error': 'λ‹΅λ³€μš© λͺ¨λΈμ΄ μ„ νƒλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.'}), 400
# λͺ¨λΈ νƒ€μž… 확인 (Gemini λ˜λŠ” Ollama)
is_gemini = answer_model.startswith('gemini:')
print(f"[μ΅œμ’… λ‹΅λ³€ 생성] λ‹΅λ³€ λͺ¨λΈ: {answer_model}, ν”„λ‘¬ν”„νŠΈ 길이: {len(full_prompt)}자")
if is_gemini:
# Gemini API 호좜
gemini_model_name = answer_model.replace('gemini:', '')
print(f"[Gemini] λͺ¨λΈ: {gemini_model_name}, 질문: {message[:50]}...")
gemini_client = get_gemini_client()
if not gemini_client.is_configured():
return jsonify({'error': 'Gemini API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. GEMINI_API_KEY ν™˜κ²½ λ³€μˆ˜λ₯Ό μ„€μ •ν•˜μ„Έμš”.'}), 500
result = gemini_client.generate_response(
prompt=full_prompt,
model_name=gemini_model_name,
temperature=0.7,
max_output_tokens=get_model_token_limit(gemini_model_name or "gemini-1.5-flash", 8192) # μ €μž₯된 토큰 수 μ‚¬μš©
)
if result['error']:
return jsonify({'error': result['error']}), 500
response_text = result.get('response', '').strip()
if not response_text:
print(f"[μ±„νŒ…] Gemini 응닡이 λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€. result: {result}")
response_text = '응닡을 생성할 수 μ—†μ—ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.'
else:
# Ollama API 호좜
# Ollama μ„œλ²„ μ—°κ²° 확인
try:
# κ°„λ‹¨ν•œ μ—°κ²° ν…ŒμŠ€νŠΈ
test_response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5)
if test_response.status_code != 200:
return jsonify({'error': f'Ollama μ„œλ²„μ— μ—°κ²°ν•  수 μ—†μŠ΅λ‹ˆλ‹€. (μƒνƒœ μ½”λ“œ: {test_response.status_code}) Ollamaκ°€ μ‹€ν–‰ 쀑인지 ν™•μΈν•˜μ„Έμš”. Ollama URL: {OLLAMA_BASE_URL}'}), 503
except requests.exceptions.ConnectionError:
return jsonify({'error': f'Ollama μ„œλ²„μ— μ—°κ²°ν•  수 μ—†μŠ΅λ‹ˆλ‹€. Ollamaκ°€ μ‹€ν–‰ 쀑인지 ν™•μΈν•˜μ„Έμš”. Ollama URL: {OLLAMA_BASE_URL}'}), 503
except Exception as e:
return jsonify({'error': f'Ollama μ„œλ²„ μ—°κ²° 확인 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}. Ollama URL: {OLLAMA_BASE_URL}'}), 503
# μž…λ ₯ 토큰 수λ₯Ό num_ctx둜 μ‚¬μš©
num_ctx = get_model_token_limit_by_type(answer_model, 100000, 'input')
num_predict = get_model_token_limit_by_type(answer_model, 8192, 'output')
ollama_response = requests.post(
f'{OLLAMA_BASE_URL}/api/generate',
json={
'model': answer_model, # λ‹΅λ³€ λͺ¨λΈ μ‚¬μš©
'prompt': full_prompt,
'stream': False,
'options': {
'num_ctx': num_ctx, # μž…λ ₯ 토큰 수λ₯Ό μ»¨ν…μŠ€νŠΈ μœˆλ„μš°λ‘œ μ‚¬μš©
'num_predict': num_predict # 좜λ ₯ 토큰 수
}
},
timeout=120 # 파일이 λ§Žμ„ 수 μžˆμœΌλ―€λ‘œ νƒ€μž„μ•„μ›ƒ 증가
)
if ollama_response.status_code != 200:
# 였λ₯˜ 상세 정보 κ°€μ Έμ˜€κΈ°
try:
error_detail = ollama_response.json().get('error', ollama_response.text[:200])
except:
error_detail = ollama_response.text[:200] if ollama_response.text else '상세 정보 μ—†μŒ'
if ollama_response.status_code == 404:
error_msg = f'λͺ¨λΈ "{answer_model}"을(λ₯Ό) 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. λͺ¨λΈμ΄ Ollama에 μ„€μΉ˜λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”. (였λ₯˜: {error_detail})'
else:
error_msg = f'Ollama μ„œλ²„ 였λ₯˜: {ollama_response.status_code} (였λ₯˜: {error_detail})'
return jsonify({'error': error_msg}), ollama_response.status_code
ollama_data = ollama_response.json()
response_text = ollama_data.get('response', '').strip()
if not response_text:
print(f"[μ±„νŒ…] Ollama 응닡이 λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€. ollama_data: {ollama_data}")
response_text = '응닡을 생성할 수 μ—†μ—ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.'
# λŒ€ν™” μ„Έμ…˜μ— λ©”μ‹œμ§€ μ €μž₯ (Gemini와 Ollama 곡톡)
session_id = data.get('session_id')
session_dict = None
if session_id:
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first()
if session:
# μ‚¬μš©μž λ©”μ‹œμ§€κ°€ 이미 μ €μž₯λ˜μ–΄ μžˆλŠ”μ§€ 확인 (쀑볡 λ°©μ§€)
# κ°€μž₯ 졜근 λ©”μ‹œμ§€λ₯Ό ν™•μΈν•˜μ—¬ 쀑볡 μ €μž₯ λ°©μ§€
latest_user_msg = ChatMessage.query.filter_by(
session_id=session_id,
role='user'
).order_by(ChatMessage.created_at.desc()).first()
# 졜근 10초 이내에 같은 λ‚΄μš©μ˜ λ©”μ‹œμ§€κ°€ μ—†μœΌλ©΄ μ €μž₯
should_save = True
if latest_user_msg:
time_diff = (datetime.utcnow() - latest_user_msg.created_at).total_seconds()
if latest_user_msg.content == message and time_diff < 10:
should_save = False
print(f"[쀑볡 λ°©μ§€] 졜근 {time_diff:.2f}초 전에 같은 λ©”μ‹œμ§€κ°€ μ €μž₯λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€. μ €μž₯을 κ±΄λ„ˆλœλ‹ˆλ‹€.")
if should_save:
user_msg = ChatMessage(
session_id=session_id,
role='user',
content=message
)
db.session.add(user_msg)
print(f"[λ©”μ‹œμ§€ μ €μž₯] μ‚¬μš©μž λ©”μ‹œμ§€ μ €μž₯: {message[:50]}...")
# μ„Έμ…˜ 제λͺ© μ—…λ°μ΄νŠΈ (첫 μ‚¬μš©μž λ©”μ‹œμ§€μΈ 경우)
title_needs_update = (
not session.title or
session.title.strip() == '' or
session.title == 'μƒˆ λŒ€ν™”'
)
if title_needs_update and message.strip():
# λ©”μ‹œμ§€ λ‚΄μš©μ„ 제λͺ©μœΌλ‘œ μ‚¬μš© (μ΅œλŒ€ 30자)
title = message.strip()[:30]
if len(message.strip()) > 30:
title += '...'
session.title = title
print(f"[μ„Έμ…˜ 제λͺ©] μ—…λ°μ΄νŠΈ: '{title}' (원본 길이: {len(message.strip())}자)")
elif title_needs_update:
print(f"[μ„Έμ…˜ 제λͺ©] λ©”μ‹œμ§€κ°€ λΉ„μ–΄μžˆμ–΄ 제λͺ©μ„ μ—…λ°μ΄νŠΈν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
else:
print(f"[λ©”μ‹œμ§€ μ €μž₯] 쀑볡 λ©”μ‹œμ§€λ‘œ 인해 μ €μž₯을 κ±΄λ„ˆλœλ‹ˆλ‹€.")
# AI 응닡 μ €μž₯
ai_msg = ChatMessage(
session_id=session_id,
role='ai',
content=response_text
)
db.session.add(ai_msg)
# μ„Έμ…˜ λͺ¨λΈ 정보 μ—…λ°μ΄νŠΈ (첫 λ©”μ‹œμ§€μΈ 경우 λ˜λŠ” λ³€κ²½λœ 경우)
if not session.analysis_model or session.analysis_model != analysis_model:
session.analysis_model = analysis_model
if not session.answer_model or session.answer_model != answer_model:
session.answer_model = answer_model
# ν•˜μœ„ ν˜Έν™˜μ„±μ„ μœ„ν•΄ model_name도 μ—…λ°μ΄νŠΈ
if not session.model_name:
session.model_name = answer_model or analysis_model
session.updated_at = datetime.utcnow()
db.session.commit()
# μ„Έμ…˜ 정보λ₯Ό 응닡에 포함 (제λͺ© μ—…λ°μ΄νŠΈ 반영)
session_dict = session.to_dict()
except Exception as e:
print(f"λ©”μ‹œμ§€ μ €μž₯ 였λ₯˜: {str(e)}")
db.session.rollback()
session_dict = None
# 응닡이 λΉ„μ–΄μžˆμœΌλ©΄ κΈ°λ³Έ λ©”μ‹œμ§€ μ‚¬μš©
if not response_text or not response_text.strip():
print(f"[μ±„νŒ…] μ΅œμ’… 응닡이 λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€. κΈ°λ³Έ λ©”μ‹œμ§€λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€.")
response_text = '응닡을 생성할 수 μ—†μ—ˆμŠ΅λ‹ˆλ‹€. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”.'
print(f"[μ±„νŒ…] μ΅œμ’… 응닡 길이: {len(response_text)}자, 미리보기: {response_text[:100]}...")
response_data = {'response': response_text, 'session_id': session_id}
if session_dict:
response_data['session'] = session_dict
return jsonify(response_data)
except requests.exceptions.ConnectionError as e:
error_msg = f'Ollama μ„œλ²„μ— μ—°κ²°ν•  수 μ—†μŠ΅λ‹ˆλ‹€. Ollamaκ°€ μ‹€ν–‰ 쀑인지 ν™•μΈν•˜μ„Έμš”. (URL: {OLLAMA_BASE_URL})'
print(f"[μ±„νŒ…] Ollama μ—°κ²° 였λ₯˜: {str(e)}")
return jsonify({'error': error_msg}), 503
except requests.exceptions.Timeout:
return jsonify({'error': '응닡 μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. 더 짧은 λ©”μ‹œμ§€λ₯Ό μ‹œλ„ν•΄λ³΄μ„Έμš”.'}), 504
except Exception as e:
error_msg = f'Ollama 톡신 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'
print(f"[μ±„νŒ…] Ollama 톡신 였λ₯˜ 상세: {str(e)}")
import traceback
traceback.print_exc()
return jsonify({'error': error_msg}), 500
else:
# λͺ¨λΈμ΄ μ„ νƒλ˜μ§€ μ•Šμ€ 경우 κΈ°λ³Έ 응닡
response_text = f"μ•ˆλ…•ν•˜μ„Έμš”! '{message}'에 λŒ€ν•œ 닡변을 μ€€λΉ„ μ€‘μž…λ‹ˆλ‹€.\n\n쒌츑 ν•˜λ‹¨μ—μ„œ 둜컬 AI λͺ¨λΈμ„ μ„ νƒν•˜λ©΄ 더 μ •ν™•ν•œ 닡변을 μ œκ³΅ν•  수 μžˆμŠ΅λ‹ˆλ‹€."
return jsonify({'response': response_text})
except Exception as e:
return jsonify({'error': f'μ±„νŒ… 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/upload', methods=['POST'])
@login_required
def upload_file():
"""μ›Ήμ†Œμ„€ 파일 μ—…λ‘œλ“œ"""
import sys
import traceback
# λͺ¨λ“  좜λ ₯을 μ¦‰μ‹œ ν”ŒλŸ¬μ‹œν•˜μ—¬ λ‘œκ·Έκ°€ λ°”λ‘œ 보이도둝
def log_print(*args, **kwargs):
from datetime import datetime
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
print(f"[{timestamp}]", *args, **kwargs)
sys.stdout.flush()
try:
log_print(f"\n{'='*60}")
log_print(f"=== 파일 μ—…λ‘œλ“œ μš”μ²­ μ‹œμž‘ ===")
log_print(f"μš”μ²­ URL: {request.url}")
log_print(f"μš”μ²­ λ©”μ„œλ“œ: {request.method}")
log_print(f"Content-Type: {request.content_type}")
log_print(f"Content-Length: {request.content_length}")
log_print(f"Remote Address: {request.remote_addr}")
log_print(f"Headers: {dict(request.headers)}")
log_print(f"Form 데이터 ν‚€: {list(request.form.keys())}")
log_print(f"Files ν‚€: {list(request.files.keys())}")
log_print(f"μ‚¬μš©μž: {current_user.username if current_user and current_user.is_authenticated else 'None'}")
log_print(f"μ‚¬μš©μž 인증 μƒνƒœ: {current_user.is_authenticated if current_user else False}")
log_print(f"{'='*60}\n")
# μ—…λ‘œλ“œ 폴더 확인 및 생성
try:
ensure_upload_folder()
log_print(f"[1/8] μ—…λ‘œλ“œ 폴더 확인 μ™„λ£Œ: {UPLOAD_FOLDER}")
except Exception as e:
error_msg = f'μ—…λ‘œλ“œ 폴더λ₯Ό μ€€λΉ„ν•  수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}'
log_print(f"[ERROR] {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'folder_check'}), 500
if 'file' not in request.files:
error_msg = '파일이 μ—†μŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
log_print(f"μ‚¬μš© κ°€λŠ₯ν•œ ν‚€: {list(request.files.keys())}")
return jsonify({'error': error_msg, 'step': 'file_check'}), 400
file = request.files['file']
model_name = request.form.get('model_name', '').strip()
parent_file_id = request.form.get('parent_file_id', None) # μ΄μ–΄μ„œ μ—…λ‘œλ“œν•  경우 원본 파일 ID
log_print(f"[2/8] 파일 μˆ˜μ‹ : {file.filename if file else 'None'}")
log_print(f"[2/8] λͺ¨λΈλͺ…: {model_name if model_name else 'None (λΉ„μ–΄μžˆμŒ)'}")
log_print(f"[2/8] μ΄μ–΄μ„œ μ—…λ‘œλ“œ: {parent_file_id if parent_file_id else 'μ•„λ‹ˆμ˜€'}")
if file.filename == '':
error_msg = '파일λͺ…이 μ—†μŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'filename_check'}), 400
# λͺ¨λΈλͺ… 검증
if not model_name:
error_msg = 'AI λͺ¨λΈμ„ μ„ νƒν•΄μ£Όμ„Έμš”.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'model_check'}), 400
# parent_file_id 검증 (μ΄μ–΄μ„œ μ—…λ‘œλ“œμΈ 경우)
parent_file = None
if parent_file_id:
try:
parent_file_id = int(parent_file_id)
parent_file = UploadedFile.query.filter_by(
id=parent_file_id,
uploaded_by=current_user.id
).first()
if not parent_file:
error_msg = '원본 νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'parent_file_check'}), 404
# 같은 λͺ¨λΈμΈμ§€ 확인
if parent_file.model_name != model_name:
error_msg = '같은 λͺ¨λΈμ˜ νŒŒμΌμ—λ§Œ μ΄μ–΄μ„œ μ—…λ‘œλ“œν•  수 μžˆμŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'model_mismatch'}), 400
log_print(f"[μ΄μ–΄μ„œ μ—…λ‘œλ“œ] 원본 파일: {parent_file.original_filename} (ID: {parent_file_id})")
except (ValueError, TypeError):
parent_file_id = None
log_print(f"[κ²½κ³ ] 잘λͺ»λœ parent_file_id: {parent_file_id}")
log_print(f"[3/8] μ—…λ‘œλ“œ μ‹œλ„: {file.filename}, λͺ¨λΈ: {model_name}")
if not allowed_file(file.filename):
error_msg = f'ν—ˆμš©λ˜μ§€ μ•Šμ€ 파일 ν˜•μ‹μž…λ‹ˆλ‹€. ν—ˆμš© ν˜•μ‹: {", ".join(ALLOWED_EXTENSIONS)}'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'file_type_check'}), 400
log_print(f"[4/8] 파일 ν˜•μ‹ 확인 μ™„λ£Œ: {file.filename}")
# 파일 크기 확인 (Content-Length 헀더 μ‚¬μš©)
file_size = 0
try:
# Content-Length 헀더 확인
if request.content_length:
file_size = request.content_length
print(f"Content-Length둜 파일 크기 확인: {file_size} bytes")
else:
# Content-Lengthκ°€ μ—†μœΌλ©΄ 파일 μŠ€νŠΈλ¦Όμ—μ„œ 크기 확인 μ‹œλ„
try:
# 파일 슀트림의 ν˜„μž¬ μœ„μΉ˜ μ €μž₯
current_pos = file.tell()
# 파일 끝으둜 이동
file.seek(0, os.SEEK_END)
file_size = file.tell()
# μ›λž˜ μœ„μΉ˜λ‘œ 볡원
file.seek(current_pos, os.SEEK_SET)
print(f"파일 슀트림으둜 크기 확인: {file_size} bytes")
except (AttributeError, IOError, OSError) as e:
print(f"파일 크기 확인 μ‹€νŒ¨ (μ €μž₯ ν›„ 확인): {str(e)}")
file_size = 0 # μ €μž₯ ν›„ ν™•μΈν•˜λ„λ‘ 0으둜 μ„€μ •
except Exception as e:
print(f"파일 크기 확인 였λ₯˜: {str(e)}")
file_size = 0 # μ €μž₯ ν›„ ν™•μΈν•˜λ„λ‘ 0으둜 μ„€μ •
# 파일 크기 사전 체크 (κ°€λŠ₯ν•œ κ²½μš°μ—λ§Œ)
if file_size > 0:
if file_size > 100 * 1024 * 1024: # 100MB
print(f"파일 크기 초과: {file_size} bytes")
return jsonify({'error': '파일 크기가 λ„ˆλ¬΄ ν½λ‹ˆλ‹€. μ΅œλŒ€ 100MBκΉŒμ§€ μ—…λ‘œλ“œ κ°€λŠ₯ν•©λ‹ˆλ‹€.'}), 400
if file_size == 0:
print("빈 파일 μ—…λ‘œλ“œ μ‹œλ„")
return jsonify({'error': '빈 νŒŒμΌμ€ μ—…λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
# μ•ˆμ „ν•œ 파일λͺ… 생성
original_filename = file.filename
filename = secure_filename(original_filename)
if not filename:
return jsonify({'error': 'μœ νš¨ν•˜μ§€ μ•Šμ€ 파일λͺ…μž…λ‹ˆλ‹€.'}), 400
unique_filename = f"{uuid.uuid4().hex}_{filename}"
file_path = os.path.join(UPLOAD_FOLDER, unique_filename)
# 파일 μ €μž₯
try:
log_print(f"[6/8] 파일 μ €μž₯ μ‹œλ„: {file_path}")
file.save(file_path)
log_print(f"[6/8] 파일 μ €μž₯ μ™„λ£Œ: {file_path}")
except IOError as e:
error_msg = f'파일 μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'
log_print(f"[ERROR] 파일 μ €μž₯ IOError: {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'file_save'}), 500
except PermissionError as e:
error_msg = f'파일 μ €μž₯ κΆŒν•œ 였λ₯˜: {str(e)}'
log_print(f"[ERROR] 파일 μ €μž₯ PermissionError: {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'file_save_permission'}), 500
except Exception as e:
error_msg = f'파일 μ €μž₯ μ‹€νŒ¨: {str(e)}'
log_print(f"[ERROR] 파일 μ €μž₯ Exception: {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'file_save'}), 500
# μ €μž₯된 파일 크기 확인
if not os.path.exists(file_path):
error_msg = '파일이 μ €μž₯λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.'
print(f"파일 쑴재 확인 μ‹€νŒ¨: {file_path}")
return jsonify({'error': error_msg}), 500
saved_file_size = os.path.getsize(file_path)
if saved_file_size == 0:
os.remove(file_path) # 빈 파일 μ‚­μ œ
error_msg = '파일이 μ œλŒ€λ‘œ μ €μž₯λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.'
print(f"빈 파일 μ‚­μ œ: {file_path}")
return jsonify({'error': error_msg}), 500
print(f"μ €μž₯된 파일 크기: {saved_file_size} bytes")
# λ°μ΄ν„°λ² μ΄μŠ€μ— μ €μž₯
try:
log_print(f"[7/8] λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ μ‹œλ„: {original_filename}")
uploaded_file = UploadedFile(
filename=unique_filename,
original_filename=original_filename,
file_path=file_path,
file_size=saved_file_size,
model_name=model_name, # 이미 검증됨
is_public=False, # κΈ°λ³Έκ°’: 미곡개
uploaded_by=current_user.id,
parent_file_id=parent_file_id if parent_file else None # μ΄μ–΄μ„œ μ—…λ‘œλ“œμΈ 경우
)
db.session.add(uploaded_file)
db.session.flush() # IDλ₯Ό μ–»κΈ° μœ„ν•΄ flush
log_print(f"[7/8] λ°μ΄ν„°λ² μ΄μŠ€ flush μ™„λ£Œ, 파일 ID: {uploaded_file.id}")
# 파일 μ €μž₯만 μ™„λ£Œ (청크 생성은 별도 API둜 처리)
db.session.commit()
log_print(f"[8/8] λ°μ΄ν„°λ² μ΄μŠ€ 컀밋 μ™„λ£Œ: {original_filename}")
log_print(f"[8/8] μ—°κ²°λœ λͺ¨λΈ: {model_name}")
log_print(f"{'='*60}")
log_print(f"=== 파일 μ—…λ‘œλ“œ μ™„λ£Œ (처리 λŒ€κΈ° 쀑) ===")
log_print(f"{'='*60}\n")
log_print(f"[8/8] μ—…λ‘œλ“œ μ™„λ£Œ - 파일: {original_filename}, λͺ¨λΈ: {model_name}, 크기: {saved_file_size} bytes")
log_print(f"[8/8] λ‹€μŒ 단계: Parent Chunk 생성, Chunk 생성, 회차 뢄석, Graph Extraction을 λ³„λ„λ‘œ μ§„ν–‰ν•©λ‹ˆλ‹€.")
# 회차 수 계산 (μ„Ήμ…˜ λΆ„ν•  κ²°κ³Ό 기반) - 파일 읽기 ν•„μš”
episode_count = 0
if original_filename.lower().endswith(('.txt', '.md')):
try:
encoding = 'utf-8'
try:
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
with open(file_path, 'r', encoding='cp949') as f:
content = f.read()
sections = split_content_by_episodes(content)
# '#μž‘ν’ˆμ„€λͺ…'을 μ œμ™Έν•œ 회차 수
episode_sections = [s for s in sections if s[0] != 'μž‘ν’ˆμ„€λͺ…']
episode_count = len(episode_sections)
log_print(f"[8/8] 회차 수 계산: {episode_count}개 회차")
except Exception as e:
log_print(f"[8/8] 회차 수 계산 였λ₯˜: {str(e)}")
episode_count = 0
return jsonify({
'message': f'파일이 μ„±κ³΅μ μœΌλ‘œ μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (λͺ¨λΈ: {model_name})',
'file': uploaded_file.to_dict(),
'model_name': model_name,
'file_id': uploaded_file.id,
'episode_count': episode_count, # 회차 수 μΆ”κ°€
'needs_processing': original_filename.lower().endswith(('.txt', '.md')) # 처리 ν•„μš” μ—¬λΆ€
}), 200
except Exception as e:
db.session.rollback()
error_msg = f'λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'
log_print(f"[ERROR] λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ 였λ₯˜: {error_msg}")
traceback.print_exc()
# λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ μ‹€νŒ¨ μ‹œ νŒŒμΌλ„ μ‚­μ œ
if 'file_path' in locals() and os.path.exists(file_path):
try:
os.remove(file_path)
log_print(f"였λ₯˜λ‘œ μΈν•œ 파일 μ‚­μ œ: {file_path}")
except Exception as del_e:
log_print(f"파일 μ‚­μ œ μ‹€νŒ¨: {str(del_e)}")
return jsonify({'error': error_msg, 'step': 'database_save'}), 500
except Exception as e:
db.session.rollback()
error_msg = str(e)
error_type = type(e).__name__
log_print(f"\n{'='*60}")
log_print(f"=== μ—…λ‘œλ“œ 처리 쀑 μ˜ˆμ™Έ λ°œμƒ ===")
log_print(f"μ˜ˆμ™Έ νƒ€μž…: {error_type}")
log_print(f"μ—λŸ¬ λ©”μ‹œμ§€: {error_msg}")
traceback.print_exc()
log_print(f"{'='*60}\n")
# 파일 크기 초과 였λ₯˜ 처리
if '413' in error_msg or 'Request Entity Too Large' in error_msg or error_type == 'RequestEntityTooLarge':
return jsonify({'error': '파일 크기가 λ„ˆλ¬΄ ν½λ‹ˆλ‹€. μ΅œλŒ€ 100MBκΉŒμ§€ μ—…λ‘œλ“œ κ°€λŠ₯ν•©λ‹ˆλ‹€.', 'step': 'file_size'}), 413
return jsonify({'error': f'파일 μ—…λ‘œλ“œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {error_type}: {error_msg}', 'step': 'exception'}), 500
@main_bp.route('/api/files', methods=['GET'])
@login_required
def get_files():
"""μ—…λ‘œλ“œλœ 파일 λͺ©λ‘ 쑰회"""
try:
model_name = request.args.get('model_name', None)
public_only = request.args.get('public_only', 'false').lower() == 'true' # 곡개 파일만 쑰회 μ˜΅μ…˜
# 원본 파일만 쑰회 (parent_file_idκ°€ None인 파일)
# κ΄€λ¦¬μžκ°€ μ•„λ‹Œ 경우 곡개 파일만 쑰회, κ΄€λ¦¬μžλŠ” λͺ¨λ“  파일 쑰회 κ°€λŠ₯
if public_only or (not current_user.is_admin):
query = UploadedFile.query.filter_by(parent_file_id=None, is_public=True)
print(f"[파일 쑰회] 곡개 파일만 쑰회 (μ‚¬μš©μž: {current_user.username}, κ΄€λ¦¬μž: {current_user.is_admin})")
else:
query = UploadedFile.query.filter_by(parent_file_id=None)
print(f"[파일 쑰회] λͺ¨λ“  파일 쑰회 (μ‚¬μš©μž: {current_user.username}, κ΄€λ¦¬μž: {current_user.is_admin})")
# λͺ¨λΈ 필터링 μ „ 전체 파일 수 확인
total_before_filter = query.count()
print(f"[파일 쑰회] 필터링 μ „ 파일 수: {total_before_filter}개")
if model_name:
query = query.filter_by(model_name=model_name)
print(f"[파일 쑰회] λͺ¨λΈ '{model_name}' 필터링")
files = query.order_by(UploadedFile.uploaded_at.desc()).all()
# 필터링 ν›„ 파일 μˆ˜μ™€ λͺ¨λΈλͺ… 확인
print(f"[파일 쑰회] 필터링 ν›„ 파일 수: {len(files)}개")
if len(files) > 0:
print(f"[파일 쑰회] 첫 번째 파일 λͺ¨λΈλͺ…: {files[0].model_name}")
else:
# 필터링 κ²°κ³Όκ°€ 없을 λ•Œ μ‹€μ œ μ‘΄μž¬ν•˜λŠ” λͺ¨λΈλͺ… 확인
all_files = UploadedFile.query.filter_by(parent_file_id=None).all()
unique_models = set(f.model_name for f in all_files if f.model_name)
print(f"[파일 쑰회] λ°μ΄ν„°λ² μ΄μŠ€μ— μ‘΄μž¬ν•˜λŠ” λͺ¨λΈλͺ… λͺ©λ‘: {list(unique_models)}")
# 각 원본 νŒŒμΌμ— λŒ€ν•΄ μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ„ 포함
files_with_children = []
for file in files:
file_dict = file.to_dict()
# 청크 개수 μΆ”κ°€
chunk_count = DocumentChunk.query.filter_by(file_id=file.id).count()
file_dict['chunk_count'] = chunk_count
# Parent Chunk 쑴재 μ—¬λΆ€ 확인
has_parent_chunk = ParentChunk.query.filter_by(file_id=file.id).first() is not None
file_dict['has_parent_chunk'] = has_parent_chunk
# μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 쑰회
child_files = UploadedFile.query.filter_by(parent_file_id=file.id).order_by(UploadedFile.uploaded_at.asc()).all()
child_files_dict = []
for child in child_files:
child_dict = child.to_dict()
child_chunk_count = DocumentChunk.query.filter_by(file_id=child.id).count()
child_dict['chunk_count'] = child_chunk_count
# Child νŒŒμΌλ„ Parent Chunk 확인
child_has_parent_chunk = ParentChunk.query.filter_by(file_id=child.id).first() is not None
child_dict['has_parent_chunk'] = child_has_parent_chunk
child_files_dict.append(child_dict)
file_dict['child_files'] = child_files_dict
files_with_children.append(file_dict)
# λͺ¨λΈλ³„ 톡계 정보 μΆ”κ°€ (원본 파일만 카운트, 곡개 파일만)
model_stats = {}
if not model_name:
# λͺ¨λ“  λͺ¨λΈμ˜ 톡계 (원본 파일만, 곡개 파일만)
if public_only or (not current_user.is_admin):
all_files = UploadedFile.query.filter_by(parent_file_id=None, is_public=True).all()
else:
all_files = UploadedFile.query.filter_by(parent_file_id=None).all()
for file in all_files:
model = file.model_name or 'λ―Έμ§€μ •'
if model not in model_stats:
model_stats[model] = {'count': 0, 'total_size': 0}
model_stats[model]['count'] += 1
model_stats[model]['total_size'] += file.file_size
else:
# νŠΉμ • λͺ¨λΈμ˜ 톡계
model_stats[model_name] = {
'count': len(files),
'total_size': sum(f.file_size for f in files)
}
print(f"[파일 쑰회] 쑰회된 원본 파일 수: {len(files)}개")
return jsonify({
'files': files_with_children,
'model_stats': model_stats,
'filtered_model': model_name
}), 200
except Exception as e:
return jsonify({'error': f'파일 λͺ©λ‘ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/chunks', methods=['GET'])
@login_required
def get_file_chunks(file_id):
"""파일의 청크 정보 쑰회 (ν•™μŠ΅ μƒνƒœ ν™•μΈμš©)"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all()
total_chunks = len(chunks)
# μƒ˜ν”Œ 청크 (처음 3개)
sample_chunks = []
for chunk in chunks[:3]:
sample_chunks.append({
'index': chunk.chunk_index,
'content_preview': chunk.content[:100] + '...' if len(chunk.content) > 100 else chunk.content,
'content_length': len(chunk.content)
})
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'model_name': file.model_name,
'total_chunks': total_chunks,
'sample_chunks': sample_chunks,
'learning_status': 'ready' if total_chunks > 0 else 'not_ready',
'message': f'{total_chunks}개 청크가 μ €μž₯λ˜μ–΄ RAG 검색에 μ‚¬μš© κ°€λŠ₯ν•©λ‹ˆλ‹€.' if total_chunks > 0 else '청크가 μƒμ„±λ˜μ§€ μ•Šμ•„ RAG 검색이 λΆˆκ°€λŠ₯ν•©λ‹ˆλ‹€.'
}), 200
except Exception as e:
return jsonify({'error': f'청크 정보 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/chunks/all', methods=['GET'])
@login_required
def get_all_file_chunks(file_id):
"""파일의 λͺ¨λ“  청크 λͺ©λ‘κ³Ό λ‚΄μš© 쑰회 (κ΄€λ¦¬μžμš©)"""
try:
# κ΄€λ¦¬μžλŠ” λͺ¨λ“  파일 쑰회 κ°€λŠ₯
if current_user.is_admin:
file = UploadedFile.query.get(file_id)
else:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all()
chunks_data = []
for chunk in chunks:
chunk_dict = {
'id': chunk.id,
'chunk_index': chunk.chunk_index,
'content': chunk.content,
'content_length': len(chunk.content),
'created_at': chunk.created_at.isoformat() if chunk.created_at else None
}
# 메타데이터 νŒŒμ‹±
if chunk.chunk_metadata:
try:
metadata = json.loads(chunk.chunk_metadata)
chunk_dict['metadata'] = metadata
except:
chunk_dict['metadata'] = None
else:
chunk_dict['metadata'] = None
chunks_data.append(chunk_dict)
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'model_name': file.model_name,
'total_chunks': len(chunks_data),
'chunks': chunks_data
}), 200
except Exception as e:
return jsonify({'error': f'청크 λͺ©λ‘ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/summary', methods=['GET'])
@login_required
def get_file_summary(file_id):
"""파일의 μš”μ•½ λ‚΄μš© 쑰회 (Parent Chunk + Episode Analysis)"""
try:
print(f"[μš”μ•½ 쑰회] 파일 ID {file_id} μš”μ•½ λ‚΄μš© 쑰회 μš”μ²­ (μ‚¬μš©μž: {current_user.username})")
# λͺ¨λ“  μ‚¬μš©μžκ°€ λͺ¨λ“  파일 쑰회 κ°€λŠ₯ (κ΄€λ¦¬μž νŽ˜μ΄μ§€μ™€ 동일)
file = UploadedFile.query.get(file_id)
if not file:
print(f"[μš”μ•½ 쑰회] νŒŒμΌμ„ 찾을 수 μ—†μŒ: 파일 ID {file_id}")
# 디버깅: 전체 파일 λͺ©λ‘ 확인
all_files = UploadedFile.query.all()
print(f"[μš”μ•½ 쑰회] λ°μ΄ν„°λ² μ΄μŠ€μ— μ‘΄μž¬ν•˜λŠ” 파일 ID λͺ©λ‘: {[f.id for f in all_files]}")
return jsonify({'error': f'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. (파일 ID: {file_id})'}), 404
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
episode_analysis = EpisodeAnalysis.query.filter_by(file_id=file_id).first()
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'parent_chunk': parent_chunk.to_dict() if parent_chunk else None,
'episode_analysis': episode_analysis.to_dict() if episode_analysis else None,
'has_parent_chunk': parent_chunk is not None,
'has_episode_analysis': episode_analysis is not None
}), 200
except Exception as e:
return jsonify({'error': f'μš”μ•½ λ‚΄μš© 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/graph', methods=['GET'])
@login_required
def get_file_graph(file_id):
"""파일의 GraphRAG 데이터 쑰회 (μ—”ν‹°ν‹°, 관계, 사건)"""
try:
print(f"[GraphRAG 쑰회] 파일 ID {file_id} GraphRAG 데이터 쑰회 μš”μ²­ (μ‚¬μš©μž: {current_user.username})")
file = UploadedFile.query.get(file_id)
if not file:
print(f"[GraphRAG 쑰회] νŒŒμΌμ„ 찾을 수 μ—†μŒ: 파일 ID {file_id}")
return jsonify({'error': f'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. (파일 ID: {file_id})'}), 404
# μ—”ν‹°ν‹° 쑰회 (νšŒμ°¨λ³„λ‘œ κ·Έλ£Ήν™”)
entities = GraphEntity.query.filter_by(file_id=file_id).all()
entities_by_episode = {}
for entity in entities:
episode = entity.episode_title
if episode not in entities_by_episode:
entities_by_episode[episode] = {'characters': [], 'locations': []}
if entity.entity_type == 'character':
entities_by_episode[episode]['characters'].append(entity.to_dict())
elif entity.entity_type == 'location':
entities_by_episode[episode]['locations'].append(entity.to_dict())
# 관계 쑰회 (νšŒμ°¨λ³„λ‘œ κ·Έλ£Ήν™”)
relationships = GraphRelationship.query.filter_by(file_id=file_id).all()
relationships_by_episode = {}
for rel in relationships:
episode = rel.episode_title
if episode not in relationships_by_episode:
relationships_by_episode[episode] = []
relationships_by_episode[episode].append(rel.to_dict())
# 사건 쑰회 (νšŒμ°¨λ³„λ‘œ κ·Έλ£Ήν™”)
events = GraphEvent.query.filter_by(file_id=file_id).all()
events_by_episode = {}
for event in events:
episode = event.episode_title
if episode not in events_by_episode:
events_by_episode[episode] = []
events_by_episode[episode].append(event.to_dict())
# 톡계 정보
total_entities = len(entities)
total_relationships = len(relationships)
total_events = len(events)
episodes = list(set([e.episode_title for e in entities] +
[r.episode_title for r in relationships] +
[ev.episode_title for ev in events]))
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'statistics': {
'total_entities': total_entities,
'total_relationships': total_relationships,
'total_events': total_events,
'episodes_count': len(episodes)
},
'entities_by_episode': entities_by_episode,
'relationships_by_episode': relationships_by_episode,
'events_by_episode': events_by_episode,
'episodes': sorted(episodes)
}), 200
except Exception as e:
print(f"[GraphRAG 쑰회] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return jsonify({'error': f'GraphRAG 데이터 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/parent-chunk', methods=['GET'])
@login_required
def get_file_parent_chunk(file_id):
"""파일의 Parent Chunk 쑰회"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
if not parent_chunk:
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'has_parent_chunk': False,
'message': 'Parent Chunkκ°€ μƒμ„±λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.'
}), 200
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'has_parent_chunk': True,
'parent_chunk': parent_chunk.to_dict(),
'message': 'Parent Chunkκ°€ μ‘΄μž¬ν•©λ‹ˆλ‹€.'
}), 200
except Exception as e:
return jsonify({'error': f'Parent Chunk 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/parent-chunk', methods=['POST'])
@login_required
def create_file_parent_chunk(file_id):
"""파일의 Parent Chunk μˆ˜λ™ 생성 (μž¬μƒμ„±)"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
# λͺ¨λΈλͺ… 확인
if not file.model_name:
return jsonify({'error': 'νŒŒμΌμ— μ—°κ²°λœ AI λͺ¨λΈμ΄ μ—†μŠ΅λ‹ˆλ‹€. Parent Chunkλ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
# 파일이 ν…μŠ€νŠΈ νŒŒμΌμΈμ§€ 확인
if not file.original_filename.lower().endswith(('.txt', '.md')):
return jsonify({'error': 'Parent ChunkλŠ” ν…μŠ€νŠΈ 파일(.txt, .md)μ—λ§Œ 생성할 수 μžˆμŠ΅λ‹ˆλ‹€.'}), 400
# 파일 경둜 확인
if not file.file_path or not os.path.exists(file.file_path):
error_msg = f'파일 κ²½λ‘œκ°€ μœ νš¨ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€: {file.file_path}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
return jsonify({'error': error_msg}), 500
# 파일 λ‚΄μš© 읽기
try:
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
with open(file.file_path, 'r', encoding='cp949') as f:
content = f.read()
except FileNotFoundError:
error_msg = f'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: {file.file_path}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
return jsonify({'error': error_msg}), 500
except PermissionError:
error_msg = f'파일 읽기 κΆŒν•œμ΄ μ—†μŠ΅λ‹ˆλ‹€: {file.file_path}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
return jsonify({'error': error_msg}), 500
except Exception as e:
error_msg = f'νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
import traceback
traceback.print_exc()
return jsonify({'error': error_msg}), 500
if not content or len(content.strip()) == 0:
return jsonify({'error': '파일 λ‚΄μš©μ΄ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.'}), 400
# Parent Chunk 생성
print(f"[Parent Chunk μˆ˜λ™ 생성] 파일 ID {file_id}에 λŒ€ν•œ Parent Chunk 생성 μ‹œμž‘")
print(f"[Parent Chunk μˆ˜λ™ 생성] λͺ¨λΈλͺ…: {file.model_name}")
print(f"[Parent Chunk μˆ˜λ™ 생성] 파일λͺ…: {file.original_filename}")
parent_chunk = create_parent_chunk_with_ai(file_id, content, file.model_name)
if parent_chunk:
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'has_parent_chunk': True,
'parent_chunk': parent_chunk.to_dict(),
'message': 'Parent Chunkκ°€ μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.'
}), 200
else:
return jsonify({
'error': 'Parent Chunk 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. μ„œλ²„ 둜그λ₯Ό ν™•μΈν•˜μ„Έμš”.',
'file_id': file_id,
'filename': file.original_filename
}), 500
except Exception as e:
import traceback
error_traceback = traceback.format_exc()
error_msg = str(e)
print(f"[Parent Chunk 생성] ❌ μ˜ˆμ™Έ λ°œμƒ: {error_msg}")
print(f"[Parent Chunk 생성] Traceback:\n{error_traceback}")
return jsonify({
'error': f'Parent Chunk 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {error_msg}',
'file_id': file_id
}), 500
@main_bp.route('/api/files/<int:file_id>/process/parent-chunk', methods=['POST'])
@login_required
def process_parent_chunk(file_id):
"""단계 1: Parent Chunk 생성"""
return create_file_parent_chunk(file_id)
@main_bp.route('/api/files/<int:file_id>/process/chunks', methods=['POST'])
@login_required
def process_chunks(file_id):
"""단계 2: Chunk 생성 (회차 뢄석, Graph Extraction μ œμ™Έ)"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
if not file.original_filename.lower().endswith(('.txt', '.md')):
return jsonify({'error': 'ChunkλŠ” ν…μŠ€νŠΈ 파일(.txt, .md)μ—λ§Œ 생성할 수 μžˆμŠ΅λ‹ˆλ‹€.'}), 400
# 파일 λ‚΄μš© 읽기
try:
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
with open(file.file_path, 'r', encoding='cp949') as f:
content = f.read()
except Exception as e:
return jsonify({'error': f'νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
print(f"[단계 2: Chunk 생성] 파일 ID {file_id}에 λŒ€ν•œ Chunk 생성 μ‹œμž‘")
chunk_count = create_chunks_for_file(file_id, content, skip_episode_analysis=True, skip_graph_extraction=True)
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'chunk_count': chunk_count,
'message': f'Chunk {chunk_count}κ°œκ°€ μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'step': 'chunks',
'completed': True
}), 200
except Exception as e:
return jsonify({'error': f'Chunk 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}', 'step': 'chunks'}), 500
@main_bp.route('/api/files/<int:file_id>/process/episode-analysis', methods=['POST'])
@login_required
def process_episode_analysis(file_id):
"""단계 3: 회차 뢄석"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
if not file.model_name:
return jsonify({'error': 'νŒŒμΌμ— μ—°κ²°λœ AI λͺ¨λΈμ΄ μ—†μŠ΅λ‹ˆλ‹€.'}), 400
if not file.original_filename.lower().endswith(('.txt', '.md')):
return jsonify({'error': '회차 뢄석은 ν…μŠ€νŠΈ 파일(.txt, .md)μ—λ§Œ κ°€λŠ₯ν•©λ‹ˆλ‹€.'}), 400
# 파일 λ‚΄μš© 읽기
try:
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
with open(file.file_path, 'r', encoding='cp949') as f:
content = f.read()
except Exception as e:
return jsonify({'error': f'νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
# μ„Ήμ…˜ λΆ„ν• 
sections = split_content_by_episodes(content)
episode_sections = [s for s in sections if s[0] != 'μž‘ν’ˆμ„€λͺ…']
if not episode_sections:
return jsonify({'error': '뢄석할 νšŒμ°¨κ°€ μ—†μŠ΅λ‹ˆλ‹€.'}), 400
# Parent Chunk κ°€μ Έμ˜€κΈ°
parent_chunk = None
try:
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
except:
pass
# κΈ°μ‘΄ 회차 뢄석 μ‚­μ œ
existing_analyses = EpisodeAnalysis.query.filter_by(file_id=file_id).all()
if existing_analyses:
for analysis in existing_analyses:
db.session.delete(analysis)
db.session.commit()
print(f"[단계 3: 회차 뢄석] 파일 ID {file_id}에 λŒ€ν•œ 회차 뢄석 μ‹œμž‘ ({len(episode_sections)}개 회차)")
# 각 회차 뢄석
all_analyses = []
for section_type, section_title, section_content, section_metadata in episode_sections:
try:
print(f"[단계 3: 회차 뢄석] '{section_title}' 뢄석 쀑...")
analysis_result = analyze_episode(
episode_content=section_content,
episode_title=section_title,
full_content=content,
parent_chunk=parent_chunk,
model_name=file.model_name
)
if analysis_result:
all_analyses.append(f"\n\n{analysis_result}")
print(f"[단계 3: 회차 뢄석] '{section_title}' 뢄석 μ™„λ£Œ")
except Exception as e:
print(f"[단계 3: 회차 뢄석] '{section_title}' 뢄석 쀑 였λ₯˜: {str(e)}")
continue
# λͺ¨λ“  회차 뢄석 κ²°κ³Όλ₯Ό ν•˜λ‚˜μ˜ ν…μŠ€νŠΈλ‘œ μ €μž₯
if all_analyses:
combined_analysis = "\n".join(all_analyses).strip()
episode_analysis = EpisodeAnalysis(
file_id=file_id,
episode_title="전체 회차 톡합 뢄석",
analysis_content=combined_analysis
)
db.session.add(episode_analysis)
db.session.commit()
# 회차 뢄석 성곡 ν›„ Graph Extraction μžλ™ μ‹€ν–‰
print(f"[단계 3: 회차 뢄석] Graph Extraction μžλ™ μ‹€ν–‰ μ‹œμž‘...")
graph_success_count = 0
for section_type, section_title, section_content, section_metadata in episode_sections:
try:
print(f"[단계 3: 회차 뢄석] '{section_title}' Graph Extraction 쀑...")
success = extract_graph_from_episode(
episode_content=section_content,
episode_title=section_title,
file_id=file_id,
full_content=content,
parent_chunk=parent_chunk,
model_name=file.model_name
)
if success:
graph_success_count += 1
print(f"[단계 3: 회차 뢄석] '{section_title}' Graph Extraction μ™„λ£Œ")
except Exception as e:
print(f"[단계 3: 회차 뢄석] '{section_title}' Graph Extraction 쀑 였λ₯˜: {str(e)}")
continue
print(f"[단계 3: 회차 뢄석] Graph Extraction μ™„λ£Œ: {graph_success_count}/{len(episode_sections)}개 회차 성곡")
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'episode_count': len(episode_sections),
'graph_success_count': graph_success_count,
'message': f'{len(episode_sections)}개 회차 뢄석이 μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (Graph Extraction: {graph_success_count}/{len(episode_sections)}개 성곡)',
'step': 'episode-analysis',
'completed': True
}), 200
else:
return jsonify({
'error': '회차 뢄석 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.',
'step': 'episode-analysis',
'completed': False
}), 500
except Exception as e:
db.session.rollback()
return jsonify({'error': f'회차 뢄석 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}', 'step': 'episode-analysis'}), 500
@main_bp.route('/api/files/<int:file_id>/process/graph', methods=['POST'])
@login_required
def process_graph(file_id):
"""단계 4: Graph Extraction"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
if not file.model_name:
return jsonify({'error': 'νŒŒμΌμ— μ—°κ²°λœ AI λͺ¨λΈμ΄ μ—†μŠ΅λ‹ˆλ‹€.'}), 400
if not file.original_filename.lower().endswith(('.txt', '.md')):
return jsonify({'error': 'Graph Extraction은 ν…μŠ€νŠΈ 파일(.txt, .md)μ—λ§Œ κ°€λŠ₯ν•©λ‹ˆλ‹€.'}), 400
# 파일 λ‚΄μš© 읽기
try:
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
with open(file.file_path, 'r', encoding='cp949') as f:
content = f.read()
except Exception as e:
return jsonify({'error': f'νŒŒμΌμ„ 읽을 수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
# μ„Ήμ…˜ λΆ„ν• 
sections = split_content_by_episodes(content)
episode_sections = [s for s in sections if s[0] != 'μž‘ν’ˆμ„€λͺ…']
if not episode_sections:
return jsonify({'error': 'Graph Extractionν•  νšŒμ°¨κ°€ μ—†μŠ΅λ‹ˆλ‹€.'}), 400
# Parent Chunk κ°€μ Έμ˜€κΈ°
parent_chunk = None
try:
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
except:
pass
print(f"[단계 4: Graph Extraction] 파일 ID {file_id}에 λŒ€ν•œ Graph Extraction μ‹œμž‘ ({len(episode_sections)}개 회차)")
# 각 회차 Graph Extraction
success_count = 0
for section_type, section_title, section_content, section_metadata in episode_sections:
try:
print(f"[단계 4: Graph Extraction] '{section_title}' Graph Extraction 쀑...")
success = extract_graph_from_episode(
episode_content=section_content,
episode_title=section_title,
file_id=file_id,
full_content=content,
parent_chunk=parent_chunk,
model_name=file.model_name
)
if success:
success_count += 1
print(f"[단계 4: Graph Extraction] '{section_title}' Graph Extraction μ™„λ£Œ")
except Exception as e:
print(f"[단계 4: Graph Extraction] '{section_title}' Graph Extraction 쀑 였λ₯˜: {str(e)}")
continue
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'episode_count': len(episode_sections),
'success_count': success_count,
'message': f'{success_count}/{len(episode_sections)}개 회차 Graph Extraction이 μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'step': 'graph',
'completed': True
}), 200
except Exception as e:
return jsonify({'error': f'Graph Extraction 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}', 'step': 'graph'}), 500
@main_bp.route('/api/files/<int:file_id>/metadata', methods=['POST'])
@login_required
def create_file_metadata(file_id):
"""파일의 λͺ¨λ“  청크에 메타데이터 생성 (μˆ˜λ™ 생성)"""
try:
file = UploadedFile.query.get_or_404(file_id)
# κΆŒν•œ 확인
if not current_user.is_admin and file.uploaded_by != current_user.id:
return jsonify({'error': 'κΆŒν•œμ΄ μ—†μŠ΅λ‹ˆλ‹€.'}), 403
# λͺ¨λΈλͺ… 확인
if not file.model_name:
return jsonify({'error': 'νŒŒμΌμ— μ—°κ²°λœ AI λͺ¨λΈμ΄ μ—†μŠ΅λ‹ˆλ‹€. 메타데이터λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
# ν…μŠ€νŠΈ 파일만 κ°€λŠ₯
if not file.original_filename.lower().endswith(('.txt', '.md')):
return jsonify({'error': 'λ©”νƒ€λ°μ΄ν„°λŠ” ν…μŠ€νŠΈ 파일(.txt, .md)μ—λ§Œ 생성할 수 μžˆμŠ΅λ‹ˆλ‹€.'}), 400
# 파일 λ‚΄μš© 읽기
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
with open(file.file_path, 'r', encoding='cp949') as f:
content = f.read()
# λͺ¨λ“  청크 κ°€μ Έμ˜€κΈ°
chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index).all()
if not chunks:
return jsonify({'error': '청크가 μ—†μŠ΅λ‹ˆλ‹€. λ¨Όμ € νŒŒμΌμ„ μ—…λ‘œλ“œν•˜μ„Έμš”.'}), 400
print(f"[메타데이터 생성] 파일 ID {file_id}에 λŒ€ν•œ 메타데이터 생성 μ‹œμž‘")
print(f"[메타데이터 생성] λͺ¨λΈλͺ…: {file.model_name}")
print(f"[메타데이터 생성] 파일λͺ…: {file.original_filename}")
print(f"[메타데이터 생성] 청크 개수: {len(chunks)}개")
# 각 청크에 메타데이터 생성
success_count = 0
fail_count = 0
for chunk in chunks:
try:
# κΈ°μ‘΄ 메타데이터 읽기
existing_metadata = {}
if chunk.chunk_metadata:
try:
existing_metadata = json.loads(chunk.chunk_metadata)
except:
existing_metadata = {}
# μƒˆ 메타데이터 μΆ”μΆœ
new_metadata = extract_chunk_metadata(
chunk_content=chunk.content,
full_content=content, # 원본 μ›Ήμ†Œμ„€ 전체 λ‚΄μš© μ°Έμ‘°
chunk_index=chunk.chunk_index,
file_id=file_id,
model_name=file.model_name
)
# κΈ°μ‘΄ 메타데이터와 μƒˆ 메타데이터 병합 (μƒˆ 메타데이터가 μš°μ„ )
# κΈ°μ‘΄ λ©”νƒ€λ°μ΄ν„°μ˜ λͺ¨λ“  ν•„λ“œλ₯Ό μœ μ§€ν•˜λ˜, μƒˆλ‘œ μΆ”μΆœν•œ ν•„λ“œλ‘œ μ—…λ°μ΄νŠΈ
# chapter ν•„λ“œλŠ” 파일 μ—…λ‘œλ“œ μ‹œ μΆ”κ°€λœ 회차 μ •λ³΄μ΄λ―€λ‘œ μœ μ§€
merged_metadata = existing_metadata.copy()
for key, value in new_metadata.items():
if value is not None and value != []:
# 리슀트인 경우 쀑볡 제거 ν›„ 병합
if isinstance(value, list) and isinstance(merged_metadata.get(key), list):
merged_list = merged_metadata.get(key, []).copy()
for item in value:
if item not in merged_list:
merged_list.append(item)
merged_metadata[key] = merged_list
else:
merged_metadata[key] = value
# 메타데이터λ₯Ό JSON λ¬Έμžμ—΄λ‘œ λ³€ν™˜
metadata_json = json.dumps(merged_metadata, ensure_ascii=False) if merged_metadata else None
# 청크에 메타데이터 μ €μž₯
chunk.chunk_metadata = metadata_json
success_count += 1
# μ§„ν–‰ 상황 좜λ ₯ (10κ°œλ§ˆλ‹€)
if (success_count + fail_count) % 10 == 0:
print(f"[메타데이터 생성] μ§„ν–‰ 쀑: {success_count + fail_count}/{len(chunks)}개 청크 처리 쀑...")
except Exception as e:
print(f"[메타데이터 생성] κ²½κ³ : 청크 {chunk.chunk_index} 메타데이터 생성 μ‹€νŒ¨: {str(e)}")
fail_count += 1
continue
# λ°μ΄ν„°λ² μ΄μŠ€ 컀밋
db.session.commit()
print(f"[메타데이터 생성] μ™„λ£Œ: {success_count}개 성곡, {fail_count}개 μ‹€νŒ¨")
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'total_chunks': len(chunks),
'success_count': success_count,
'fail_count': fail_count,
'message': f'메타데이터 생성이 μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (성곡: {success_count}개, μ‹€νŒ¨: {fail_count}개)'
}), 200
except Exception as e:
db.session.rollback()
print(f"[메타데이터 생성] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return jsonify({'error': f'메타데이터 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>', methods=['DELETE'])
@login_required
def delete_file(file_id):
"""μ—…λ‘œλ“œλœ 파일 μ‚­μ œ (μ—°κ΄€λœ λͺ¨λ“  νŒŒμΌλ„ ν•¨κ»˜ μ‚­μ œ)"""
try:
file = UploadedFile.query.get_or_404(file_id)
# 원본 파일인 경우 (parent_file_idκ°€ None인 경우)
# μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ λͺ¨λ“  νŒŒμΌλ„ ν•¨κ»˜ μ‚­μ œ
files_to_delete = []
if file.parent_file_id is None:
# 원본 파일이면, μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ λͺ¨λ“  νŒŒμΌλ„ μ°Ύμ•„μ„œ μ‚­μ œ
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
files_to_delete = [file] + child_files
print(f"[파일 μ‚­μ œ] 원본 파일 μ‚­μ œ: {file.original_filename}, μ—°κ΄€ 파일 {len(child_files)}κ°œλ„ ν•¨κ»˜ μ‚­μ œ")
else:
# μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ 파일이면 원본 νŒŒμΌλ„ ν•¨κ»˜ μ‚­μ œ
parent_file = UploadedFile.query.get(file.parent_file_id)
if parent_file:
# 원본 파일과 λͺ¨λ“  μ—°κ΄€ 파일 μ‚­μ œ
all_child_files = UploadedFile.query.filter_by(parent_file_id=file.parent_file_id).all()
files_to_delete = [parent_file] + all_child_files
print(f"[파일 μ‚­μ œ] μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ 파일 μ‚­μ œ: {file.original_filename}, 원본 및 μ—°κ΄€ 파일 {len(all_child_files)}κ°œλ„ ν•¨κ»˜ μ‚­μ œ")
else:
files_to_delete = [file]
deleted_count = 0
deleted_files = []
for file_to_delete in files_to_delete:
try:
# 파일 μ‹œμŠ€ν…œμ—μ„œ μ‚­μ œ
if os.path.exists(file_to_delete.file_path):
os.remove(file_to_delete.file_path)
print(f"[파일 μ‚­μ œ] 파일 μ‹œμŠ€ν…œμ—μ„œ μ‚­μ œ: {file_to_delete.file_path}")
# κ΄€λ ¨ Child Chunk (DocumentChunk) μ‚­μ œ
child_chunk_count = DocumentChunk.query.filter_by(file_id=file_to_delete.id).count()
if child_chunk_count > 0:
DocumentChunk.query.filter_by(file_id=file_to_delete.id).delete()
print(f"[파일 μ‚­μ œ] Child Chunk {child_chunk_count}개 μ‚­μ œ μ™„λ£Œ")
# 벑터 DBμ—μ„œλ„ ν•΄λ‹Ή 파일의 청크 μ‚­μ œ
try:
vector_db = get_vector_db()
vector_db.delete_chunks_by_file_id(file_to_delete.id)
print(f"[파일 μ‚­μ œ] 벑터 DBμ—μ„œ 청크 μ‚­μ œ μ™„λ£Œ")
except Exception as vector_e:
print(f"[파일 μ‚­μ œ] 벑터 DB μ‚­μ œ 였λ₯˜ (λ¬΄μ‹œ): {str(vector_e)}")
# κ΄€λ ¨ Parent Chunk μ‚­μ œ
parent_chunk = ParentChunk.query.filter_by(file_id=file_to_delete.id).first()
if parent_chunk:
db.session.delete(parent_chunk)
print(f"[파일 μ‚­μ œ] Parent Chunk μ‚­μ œ μ™„λ£Œ")
# κ΄€λ ¨ EpisodeAnalysis μ‚­μ œ
episode_analysis_count = EpisodeAnalysis.query.filter_by(file_id=file_to_delete.id).count()
if episode_analysis_count > 0:
EpisodeAnalysis.query.filter_by(file_id=file_to_delete.id).delete()
print(f"[파일 μ‚­μ œ] EpisodeAnalysis {episode_analysis_count}개 μ‚­μ œ μ™„λ£Œ")
# κ΄€λ ¨ GraphRAG 데이터 μ‚­μ œ (GraphEntity, GraphRelationship, GraphEvent)
graph_entity_count = GraphEntity.query.filter_by(file_id=file_to_delete.id).count()
if graph_entity_count > 0:
GraphEntity.query.filter_by(file_id=file_to_delete.id).delete()
print(f"[파일 μ‚­μ œ] GraphEntity {graph_entity_count}개 μ‚­μ œ μ™„λ£Œ")
graph_relationship_count = GraphRelationship.query.filter_by(file_id=file_to_delete.id).count()
if graph_relationship_count > 0:
GraphRelationship.query.filter_by(file_id=file_to_delete.id).delete()
print(f"[파일 μ‚­μ œ] GraphRelationship {graph_relationship_count}개 μ‚­μ œ μ™„λ£Œ")
graph_event_count = GraphEvent.query.filter_by(file_id=file_to_delete.id).count()
if graph_event_count > 0:
GraphEvent.query.filter_by(file_id=file_to_delete.id).delete()
print(f"[파일 μ‚­μ œ] GraphEvent {graph_event_count}개 μ‚­μ œ μ™„λ£Œ")
deleted_files.append(file_to_delete.original_filename)
db.session.delete(file_to_delete)
deleted_count += 1
print(f"[파일 μ‚­μ œ] λ°μ΄ν„°λ² μ΄μŠ€μ—μ„œ 파일 μ‚­μ œ μ™„λ£Œ: {file_to_delete.original_filename}")
except Exception as e:
print(f"[파일 μ‚­μ œ 였λ₯˜] {file_to_delete.original_filename}: {str(e)}")
import traceback
traceback.print_exc()
db.session.commit()
message = f'파일이 μ„±κ³΅μ μœΌλ‘œ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.'
if deleted_count > 1:
message = f'파일 {deleted_count}κ°œκ°€ μ„±κ³΅μ μœΌλ‘œ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (원본 및 μ—°κ΄€ 파일 포함)'
return jsonify({
'message': message,
'deleted_count': deleted_count,
'deleted_files': deleted_files
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'파일 μ‚­μ œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/public', methods=['PUT'])
@login_required
@admin_required
def toggle_file_public(file_id):
"""파일 곡개 μ—¬λΆ€ λ³€κ²½ (κ΄€λ¦¬μžλ§Œ κ°€λŠ₯)"""
try:
file = UploadedFile.query.get_or_404(file_id)
data = request.get_json()
is_public = data.get('is_public', False)
file.is_public = is_public
db.session.commit()
return jsonify({
'message': f'파일이 {"곡개" if is_public else "λΉ„κ³΅κ°œ"}둜 μ„€μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'file': file.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'파일 곡개 μ—¬λΆ€ λ³€κ²½ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/content', methods=['GET'])
@login_required
def get_file_content(file_id):
"""μ—…λ‘œλ“œλœ 파일 λ‚΄μš© 쑰회"""
try:
file = UploadedFile.query.get_or_404(file_id)
if not os.path.exists(file.file_path):
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
# ν…μŠ€νŠΈ 파일 읽기
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
# UTF-8둜 읽을 수 μ—†μœΌλ©΄ λ‹€λ₯Έ 인코딩 μ‹œλ„
with open(file.file_path, 'r', encoding='cp949') as f:
content = f.read()
return jsonify({
'content': content,
'filename': file.original_filename
}), 200
except Exception as e:
return jsonify({'error': f'파일 λ‚΄μš© 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions', methods=['GET'])
@login_required
def get_chat_sessions():
"""μ‚¬μš©μžμ˜ λŒ€ν™” μ„Έμ…˜ λͺ©λ‘ 쑰회 (졜근 20개만 ν‘œμ‹œ)"""
try:
sessions = ChatSession.query.filter_by(user_id=current_user.id)\
.order_by(ChatSession.updated_at.desc())\
.limit(20).all()
return jsonify({
'sessions': [session.to_dict() for session in sessions]
}), 200
except Exception as e:
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions', methods=['POST'])
@login_required
def create_chat_session():
"""μƒˆ λŒ€ν™” μ„Έμ…˜ 생성"""
try:
data = request.json
title = data.get('title', 'μƒˆ λŒ€ν™”')
model_name = data.get('model_name', None) # ν•˜μœ„ ν˜Έν™˜μ„±
analysis_model = data.get('analysis_model', None)
answer_model = data.get('answer_model', None)
session = ChatSession(
user_id=current_user.id,
title=title,
model_name=model_name, # ν•˜μœ„ ν˜Έν™˜μ„±
analysis_model=analysis_model,
answer_model=answer_model
)
db.session.add(session)
db.session.commit()
return jsonify({
'message': 'λŒ€ν™” μ„Έμ…˜μ΄ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'session': session.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>', methods=['GET'])
@login_required
def get_chat_session(session_id):
"""λŒ€ν™” μ„Έμ…˜ 상세 쑰회 (λ©”μ‹œμ§€ 포함)"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
session_dict = session.to_dict()
session_dict['messages'] = [msg.to_dict() for msg in session.messages]
return jsonify({'session': session_dict}), 200
except Exception as e:
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>', methods=['PUT'])
@login_required
def update_chat_session(session_id):
"""λŒ€ν™” μ„Έμ…˜ μˆ˜μ • (제λͺ© λ“±)"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
data = request.json
if 'title' in data:
session.title = data['title']
session.updated_at = datetime.utcnow()
db.session.commit()
return jsonify({
'message': 'λŒ€ν™” μ„Έμ…˜μ΄ μˆ˜μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'session': session.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ μˆ˜μ • 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>', methods=['DELETE'])
@login_required
def delete_chat_session(session_id):
"""λŒ€ν™” μ„Έμ…˜ μ‚­μ œ"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
db.session.delete(session)
db.session.commit()
return jsonify({'message': 'λŒ€ν™” μ„Έμ…˜μ΄ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.'}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ μ‚­μ œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>/messages', methods=['POST'])
@login_required
def add_chat_message(session_id):
"""λŒ€ν™” λ©”μ‹œμ§€ μΆ”κ°€"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
data = request.json
role = data.get('role', 'user')
content = data.get('content', '')
if not content:
return jsonify({'error': 'λ©”μ‹œμ§€ λ‚΄μš©μ΄ ν•„μš”ν•©λ‹ˆλ‹€.'}), 400
message = ChatMessage(
session_id=session_id,
role=role,
content=content
)
db.session.add(message)
# μ„Έμ…˜ 제λͺ© μ—…λ°μ΄νŠΈ (첫 μ‚¬μš©μž λ©”μ‹œμ§€μΈ 경우)
if not session.title or session.title == 'μƒˆ λŒ€ν™”':
if role == 'user':
title = content[:30] + '...' if len(content) > 30 else content
session.title = title
session.updated_at = datetime.utcnow()
db.session.commit()
return jsonify({
'message': 'λ©”μ‹œμ§€κ°€ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'chat_message': message.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λ©”μ‹œμ§€ μΆ”κ°€ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500