soyailabs / app /routes.py
SOY NV AI
Add Gemini API integration with REST API support, improve error handling, and add markdown bold formatting for messages
665bcdc
raw
history blame
97.2 kB
from flask import Blueprint, render_template, request, jsonify, send_from_directory, redirect, url_for, flash
from flask_login import login_user, logout_user, login_required, current_user
from werkzeug.utils import secure_filename
from app.database import db, UploadedFile, User, ChatSession, ChatMessage, DocumentChunk, ParentChunk, SystemConfig
from app.vector_db import get_vector_db
from app.gemini_client import get_gemini_client
import requests
import os
from datetime import datetime
import uuid
import re
import json
main_bp = Blueprint('main', __name__)
def admin_required(f):
"""κ΄€λ¦¬μž κΆŒν•œμ΄ ν•„μš”ν•œ λ°μ½”λ ˆμ΄ν„°"""
from functools import wraps
@wraps(f)
@login_required
def decorated_function(*args, **kwargs):
if not current_user.is_admin:
# API μš”μ²­μΈ 경우 JSON 응닡 λ°˜ν™˜
if request.path.startswith('/api/'):
return jsonify({'error': 'κ΄€λ¦¬μž κΆŒν•œμ΄ ν•„μš”ν•©λ‹ˆλ‹€.'}), 403
flash('κ΄€λ¦¬μž κΆŒν•œμ΄ ν•„μš”ν•©λ‹ˆλ‹€.', 'error')
return redirect(url_for('main.index'))
return f(*args, **kwargs)
return decorated_function
# Ollama κΈ°λ³Έ URL (ν™˜κ²½ λ³€μˆ˜λ‘œ μ„€μ • κ°€λŠ₯)
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
# μ—…λ‘œλ“œ μ„€μ •
UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'uploads')
ALLOWED_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'epub'}
# μ—…λ‘œλ“œ 폴더 경둜 좜λ ₯ (λ””λ²„κΉ…μš©)
print(f"[μ—…λ‘œλ“œ μ„€μ •] μ—…λ‘œλ“œ 폴더 경둜: {UPLOAD_FOLDER}")
print(f"[μ—…λ‘œλ“œ μ„€μ •] μ—…λ‘œλ“œ 폴더 쑴재 μ—¬λΆ€: {os.path.exists(UPLOAD_FOLDER)}")
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def ensure_upload_folder():
"""μ—…λ‘œλ“œ 폴더가 μ—†μœΌλ©΄ 생성"""
try:
if not os.path.exists(UPLOAD_FOLDER):
print(f"μ—…λ‘œλ“œ 폴더 생성 쀑: {UPLOAD_FOLDER}")
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
if not os.path.exists(UPLOAD_FOLDER):
raise Exception(f'μ—…λ‘œλ“œ 폴더λ₯Ό 생성할 수 μ—†μŠ΅λ‹ˆλ‹€: {UPLOAD_FOLDER}')
# 폴더 μ“°κΈ° κΆŒν•œ 확인
test_file = os.path.join(UPLOAD_FOLDER, '.write_test')
try:
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
print(f"μ—…λ‘œλ“œ 폴더 μ“°κΈ° κΆŒν•œ 확인 μ™„λ£Œ: {UPLOAD_FOLDER}")
except PermissionError as e:
raise Exception(f'μ—…λ‘œλ“œ 폴더에 μ“°κΈ° κΆŒν•œμ΄ μ—†μŠ΅λ‹ˆλ‹€: {UPLOAD_FOLDER} - {str(e)}')
except Exception as e:
raise Exception(f'μ—…λ‘œλ“œ 폴더 μ“°κΈ° ν…ŒμŠ€νŠΈ μ‹€νŒ¨: {UPLOAD_FOLDER} - {str(e)}')
except Exception as e:
print(f"μ—…λ‘œλ“œ 폴더 생성 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
raise
def split_text_into_chunks(text, min_chunk_size=200, max_chunk_size=1000, overlap=150):
"""의미 기반 ν…μŠ€νŠΈ μ²­ν‚Ή (λ¬Έμž₯κ³Ό 문단 경계λ₯Ό κ³ λ €ν•˜μ—¬ λΆ„ν• )"""
if not text or len(text.strip()) == 0:
return []
# 1단계: 문단 λ‹¨μœ„λ‘œ λΆ„ν•  (빈 쀄 κΈ°μ€€)
paragraphs = re.split(r'\n\s*\n', text.strip())
paragraphs = [p.strip() for p in paragraphs if p.strip()]
if not paragraphs:
return []
# 2단계: 각 문단을 λ¬Έμž₯ λ‹¨μœ„λ‘œ λΆ„ν• 
# λ¬Έμž₯ μ’…κ²° 기호: . ! ? (ν•œκΈ€κ³Ό 영문 λͺ¨λ‘ 지원)
# ꡬ두점 뒀에 κ³΅λ°±μ΄λ‚˜ μ€„λ°”κΏˆμ΄ μ˜€λŠ” 경우 λ¬Έμž₯ μ’…λ£Œλ‘œ κ°„μ£Ό
sentence_pattern = r'([.!?]+)(?=\s+|$)'
all_sentences = []
for para in paragraphs:
# λ¬Έμž₯ 뢄리 (ꡬ두점 포함)
parts = re.split(sentence_pattern, para)
combined_sentences = []
current_sentence = ""
for i, part in enumerate(parts):
if part.strip():
if re.match(r'^[.!?]+$', part):
# ꡬ두점인 경우 ν˜„μž¬ λ¬Έμž₯에 μΆ”κ°€ν•˜κ³  λ¬Έμž₯ μ™„μ„±
current_sentence += part
if current_sentence.strip():
combined_sentences.append(current_sentence.strip())
current_sentence = ""
else:
# ν…μŠ€νŠΈμΈ 경우 ν˜„μž¬ λ¬Έμž₯에 μΆ”κ°€
current_sentence += part
# λ§ˆμ§€λ§‰ λ¬Έμž₯ 처리 (ꡬ두점이 μ—†λŠ” 경우)
if current_sentence.strip():
combined_sentences.append(current_sentence.strip())
# λ¬Έμž₯이 ν•˜λ‚˜λ„ μ—†λŠ” 경우 (ꡬ두점이 μ „ν˜€ μ—†λŠ” 문단)
if not combined_sentences and para.strip():
combined_sentences.append(para.strip())
all_sentences.extend(combined_sentences)
if not all_sentences:
# λ¬Έμž₯ 뢄리가 μ•ˆ λ˜λŠ” 경우 원본 ν…μŠ€νŠΈλ₯Ό κ·ΈλŒ€λ‘œ λ°˜ν™˜
return [text] if text.strip() else []
# 3단계: λ¬Έμž₯듀을 λͺ¨μ•„μ„œ 의미 μžˆλŠ” 청크 생성
chunks = []
current_chunk = []
current_size = 0
for sentence in all_sentences:
sentence_size = len(sentence)
# ν˜„μž¬ 청크에 λ¬Έμž₯ μΆ”κ°€ μ‹œ μ΅œλŒ€ 크기λ₯Ό μ΄ˆκ³Όν•˜λŠ” 경우
if current_size + sentence_size > max_chunk_size and current_chunk:
# ν˜„μž¬ 청크 μ €μž₯ (μ€„λ°”κΏˆ μœ μ§€)
chunk_text = '\n'.join(current_chunk)
if len(chunk_text.strip()) >= min_chunk_size:
chunks.append(chunk_text)
else:
# μ΅œμ†Œ 크기 미만이면 λ‹€μŒ 청크와 병합 (μ˜€λ²„λž© 효과)
if chunks:
chunks[-1] = chunks[-1] + '\n' + chunk_text
else:
chunks.append(chunk_text)
# μ˜€λ²„λž©μ„ μœ„ν•œ λ¬Έμž₯ μœ μ§€ (λ§ˆμ§€λ§‰ λͺ‡ λ¬Έμž₯을 λ‹€μŒ 청크에 포함)
overlap_sentences = []
overlap_size = 0
for s in reversed(current_chunk):
if overlap_size + len(s) <= overlap:
overlap_sentences.insert(0, s)
overlap_size += len(s) + 1 # μ€„λ°”κΏˆ 포함
else:
break
current_chunk = overlap_sentences + [sentence]
current_size = overlap_size + sentence_size
else:
# ν˜„μž¬ 청크에 λ¬Έμž₯ μΆ”κ°€
current_chunk.append(sentence)
current_size += sentence_size + 1 # μ€„λ°”κΏˆ 포함
# λ§ˆμ§€λ§‰ 청크 μΆ”κ°€
if current_chunk:
chunk_text = '\n'.join(current_chunk)
if chunks and len(chunk_text.strip()) < min_chunk_size:
# μ΅œμ†Œ 크기 미만이면 이전 청크와 병합
chunks[-1] = chunks[-1] + '\n' + chunk_text
else:
chunks.append(chunk_text)
# 빈 청크 제거 및 μ΅œμ†Œ 크기 미만 청크 처리
final_chunks = []
for chunk in chunks:
chunk = chunk.strip()
if chunk and len(chunk) >= min_chunk_size:
final_chunks.append(chunk)
elif chunk:
# μ΅œμ†Œ 크기 미만 μ²­ν¬λŠ” 이전 청크와 병합
if final_chunks:
final_chunks[-1] = final_chunks[-1] + '\n' + chunk
else:
final_chunks.append(chunk)
return final_chunks if final_chunks else [text] if text.strip() else []
def create_chunks_for_file(file_id, content):
"""파일 λ‚΄μš©μ„ 의미 기반 청크둜 λΆ„ν• ν•˜μ—¬ μ €μž₯ (벑터 DB 포함)"""
try:
print(f"[청크 생성] 파일 ID {file_id}에 λŒ€ν•œ 청크 생성 μ‹œμž‘")
print(f"[청크 생성] 원본 ν…μŠ€νŠΈ 길이: {len(content)}자")
# 벑터 DB λ§€λ‹ˆμ € κ°€μ Έμ˜€κΈ°
vector_db = get_vector_db()
# κΈ°μ‘΄ 청크 μ‚­μ œ (DB + 벑터 DB)
existing_chunks = DocumentChunk.query.filter_by(file_id=file_id).all()
if existing_chunks:
print(f"[청크 생성] κΈ°μ‘΄ 청크 {len(existing_chunks)}개 μ‚­μ œ 쀑...")
# 벑터 DBμ—μ„œ μ‚­μ œ
vector_db.delete_chunks_by_file_id(file_id)
# DBμ—μ„œ μ‚­μ œ
DocumentChunk.query.filter_by(file_id=file_id).delete()
db.session.commit()
# 의미 기반 μ²­ν‚Ή (λ¬Έμž₯κ³Ό 문단 경계λ₯Ό κ³ λ €ν•˜μ—¬ λΆ„ν• )
# min_chunk_size: μ΅œμ†Œ 200자, max_chunk_size: μ΅œλŒ€ 1000자, overlap: 150자
chunks = split_text_into_chunks(content, min_chunk_size=200, max_chunk_size=1000, overlap=150)
print(f"[청크 생성] λΆ„ν• λœ 청크 수: {len(chunks)}개")
if len(chunks) == 0:
print(f"[청크 생성] κ²½κ³ : 청크가 μƒμ„±λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. ν…μŠ€νŠΈκ°€ λ„ˆλ¬΄ μ§§κ±°λ‚˜ λΉ„μ–΄μžˆμ„ 수 μžˆμŠ΅λ‹ˆλ‹€.")
return 0
# 각 청크λ₯Ό λ°μ΄ν„°λ² μ΄μŠ€μ™€ 벑터 DB에 μ €μž₯
saved_count = 0
vector_saved_count = 0
for idx, chunk_content in enumerate(chunks):
try:
# DB에 청크 μ €μž₯
chunk = DocumentChunk(
file_id=file_id,
chunk_index=idx,
content=chunk_content
)
db.session.add(chunk)
db.session.flush() # ID 생성
# 벑터 DB에 청크 μΆ”κ°€
if vector_db.add_chunk(
chunk_id=chunk.id,
chunk_content=chunk_content,
file_id=file_id,
chunk_index=idx
):
vector_saved_count += 1
saved_count += 1
# μ§„ν–‰ 상황 좜λ ₯ (10κ°œλ§ˆλ‹€)
if (idx + 1) % 10 == 0:
print(f"[청크 생성] μ§„ν–‰ 쀑: {idx + 1}/{len(chunks)}개 청크 μ €μž₯ 쀑... (DB: {saved_count}, 벑터 DB: {vector_saved_count})")
except Exception as e:
print(f"[청크 생성] κ²½κ³ : 청크 {idx} μ €μž₯ 쀑 였λ₯˜: {str(e)}")
continue
db.session.commit()
print(f"[청크 생성] μ™„λ£Œ: {saved_count}개 청크가 λ°μ΄ν„°λ² μ΄μŠ€μ— μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€. (벑터 DB: {vector_saved_count}개)")
# μ €μž₯ 확인
verified_count = DocumentChunk.query.filter_by(file_id=file_id).count()
if verified_count != saved_count:
print(f"[청크 생성] κ²½κ³ : μ €μž₯된 청크 수({saved_count})와 ν™•μΈλœ 청크 수({verified_count})κ°€ μΌμΉ˜ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
else:
print(f"[청크 생성] 검증 μ™„λ£Œ: {verified_count}개 청크가 μ •μƒμ μœΌλ‘œ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
return saved_count
except Exception as e:
db.session.rollback()
print(f"[청크 생성] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return 0
def create_parent_chunk_with_ai(file_id, content, model_name):
"""AIλ₯Ό μ‚¬μš©ν•˜μ—¬ Parent Chunk 생성 (μ›Ήμ†Œμ„€ 뢄석)"""
try:
print(f"[Parent Chunk 생성] 파일 ID {file_id}에 λŒ€ν•œ Parent Chunk 생성 μ‹œμž‘")
print(f"[Parent Chunk 생성] μ‚¬μš© λͺ¨λΈ: {model_name}")
print(f"[Parent Chunk 생성] 원본 ν…μŠ€νŠΈ 길이: {len(content)}자")
# λͺ¨λΈλͺ…이 Noneμ΄κ±°λ‚˜ 빈 λ¬Έμžμ—΄μΈ 경우 처리
if not model_name or not model_name.strip():
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: λͺ¨λΈλͺ…이 μ œκ³΅λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
return None
# ν…μŠ€νŠΈκ°€ λ„ˆλ¬΄ κΈΈλ©΄ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 50000자)
content_preview = content[:50000] if len(content) > 50000 else content
if len(content) > 50000:
print(f"[Parent Chunk 생성] ν…μŠ€νŠΈκ°€ κΈΈμ–΄ μΌλΆ€λ§Œ μ‚¬μš©: {len(content_preview)}자 (전체: {len(content)}자)")
# 뢄석 ν”„λ‘¬ν”„νŠΈ 생성
analysis_prompt = f"""λ‹€μŒ μ›Ήμ†Œμ„€ ν…μŠ€νŠΈλ₯Ό λΆ„μ„ν•˜μ—¬ λ‹€μŒ ν•­λͺ©λ“€μ„ μž‘μ„±ν•΄μ£Όμ„Έμš”. 각 ν•­λͺ©μ€ λͺ…ν™•ν•˜κ³  ꡬ체적으둜 μž‘μ„±ν•΄μ£Όμ„Έμš”.
ν…μŠ€νŠΈ λ‚΄μš©:
{content_preview}
μœ„ ν…μŠ€νŠΈλ₯Ό λΆ„μ„ν•˜μ—¬ λ‹€μŒ ν˜•μ‹μœΌλ‘œ λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
## 세계관 μ„€λͺ…
[세계관에 λŒ€ν•œ μƒμ„Έν•œ μ„€λͺ…을 μž‘μ„±ν•˜μ„Έμš”. λ°°κ²½, μ„€μ •, κ·œμΉ™ 등을 ν¬ν•¨ν•˜μ„Έμš”.]
## μ£Όμš” 캐릭터 뢄석
[μ£Όμš” λ“±μž₯μΈλ¬Όλ“€μ˜ 이름, μ—­ν• , 성격, νŠΉμ§• 등을 λΆ„μ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”. 각 μΊλ¦­ν„°λ³„λ‘œ κ΅¬λΆ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”.]
## μ£Όμš” μŠ€ν† λ¦¬ 뢄석
[전체적인 μŠ€ν† λ¦¬ 흐름, μ£Όμš” 사건, κ°ˆλ“± ꡬ쑰 등을 λΆ„μ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”.]
## μ£Όμš” μ—ν”Όμ†Œλ“œ 뢄석
[μ€‘μš”ν•œ μ—ν”Όμ†Œλ“œλ‚˜ 챕터별 μ£Όμš” λ‚΄μš©μ„ λΆ„μ„ν•˜μ—¬ μž‘μ„±ν•˜μ„Έμš”. μ‹œκ°„ μˆœμ„œλŒ€λ‘œ μ •λ¦¬ν•˜λ©΄ μ’‹μŠ΅λ‹ˆλ‹€.]
## 기타
[μœ„ μΉ΄ν…Œκ³ λ¦¬μ— ν¬ν•¨λ˜μ§€ μ•Šμ§€λ§Œ μ€‘μš”ν•œ μ •λ³΄λ‚˜ νŠΉμ§• 등을 μž‘μ„±ν•˜μ„Έμš”.]
각 ν•­λͺ©μ„ λͺ…ν™•ν•˜κ²Œ κ΅¬λΆ„ν•˜μ—¬ μž‘μ„±ν•΄μ£Όμ„Έμš”."""
# λͺ¨λΈ νƒ€μž… 확인 (Gemini λ˜λŠ” Ollama)
# Gemini λͺ¨λΈλͺ… ν˜•μ‹: "gemini:λͺ¨λΈλͺ…" λ˜λŠ” "gemini-1.5-flash" (접두사 μ—†λŠ” κ²½μš°λ„ 지원)
model_name_lower = model_name.lower().strip()
is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-')
print(f"[Parent Chunk 생성] λͺ¨λΈ νƒ€μž… 확인: is_gemini={is_gemini}, model_name={model_name}")
if is_gemini:
# Gemini API 호좜
# λͺ¨λΈλͺ…μ—μ„œ "gemini:" 접두사 제거 (λŒ€μ†Œλ¬Έμž ꡬ뢄 없이)
gemini_model_name = model_name.strip()
if gemini_model_name.lower().startswith('gemini:'):
gemini_model_name = gemini_model_name.split(':', 1)[1].strip()
# "gemini-"둜 μ‹œμž‘ν•˜λŠ” 경우 (예: "gemini-1.5-flash") κ·ΈλŒ€λ‘œ μ‚¬μš©
print(f"[Parent Chunk 생성] Gemini API에 뢄석 μš”μ²­ 전솑 쀑... (λͺ¨λΈ: {gemini_model_name})")
print(f"[Parent Chunk 생성] 원본 λͺ¨λΈλͺ…: {model_name} -> Gemini λͺ¨λΈλͺ…: {gemini_model_name}")
gemini_client = get_gemini_client()
if not gemini_client.is_configured():
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: Gemini API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
print(f"[Parent Chunk 생성] 디버그: Gemini ν΄λΌμ΄μ–ΈνŠΈ μƒνƒœ 확인 쀑...")
# API ν‚€ μƒνƒœ λ‹€μ‹œ 확인
from app.gemini_client import get_gemini_api_key
api_key = get_gemini_api_key()
if api_key:
print(f"[Parent Chunk 생성] 디버그: API ν‚€λŠ” μ‘΄μž¬ν•˜μ§€λ§Œ ν΄λΌμ΄μ–ΈνŠΈκ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. (길이: {len(api_key)})")
else:
print(f"[Parent Chunk 생성] 디버그: API ν‚€κ°€ λ°μ΄ν„°λ² μ΄μŠ€μ— μ—†μŠ΅λ‹ˆλ‹€.")
return None
print(f"[Parent Chunk 생성] Gemini API ν‚€ 확인 μ™„λ£Œ. API 호좜 μ‹œμž‘...")
result = gemini_client.generate_response(
prompt=analysis_prompt,
model_name=gemini_model_name,
temperature=0.7,
max_output_tokens=8192
)
if result['error']:
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: Gemini API 호좜 μ‹€νŒ¨ - {result['error']}")
print(f"[Parent Chunk 생성] 디버그: result 객체 λ‚΄μš©: {result}")
return None
if not result.get('response'):
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: Gemini API 응닡이 λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
print(f"[Parent Chunk 생성] 디버그: result 객체 λ‚΄μš©: {result}")
return None
analysis_result = result['response']
print(f"[Parent Chunk 생성] Gemini API 응닡 μˆ˜μ‹  성곡: {len(analysis_result)}자")
else:
# Ollama API 호좜
print(f"[Parent Chunk 생성] Ollama API에 뢄석 μš”μ²­ 전솑 쀑... (λͺ¨λΈ: {model_name})")
try:
ollama_response = requests.post(
f'{OLLAMA_BASE_URL}/api/chat',
json={
'model': model_name,
'messages': [
{
'role': 'user',
'content': analysis_prompt
}
],
'stream': False
},
timeout=300 # 5λΆ„ νƒ€μž„μ•„μ›ƒ
)
if ollama_response.status_code != 200:
error_detail = ollama_response.text if ollama_response.text else '상세 정보 μ—†μŒ'
if ollama_response.status_code == 404:
error_msg = f'Ollama API 였λ₯˜ 404: λͺ¨λΈ "{model_name}"을(λ₯Ό) 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. λͺ¨λΈμ΄ Ollama에 μ„€μΉ˜λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”.'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
print(f"[Parent Chunk 생성] 디버그: λ§Œμ•½ Gemini λͺ¨λΈμ„ μ‚¬μš©ν•˜λ €λ©΄ λͺ¨λΈλͺ…이 'gemini:' λ˜λŠ” 'gemini-'둜 μ‹œμž‘ν•΄μ•Ό ν•©λ‹ˆλ‹€.")
else:
error_msg = f'Ollama API 였λ₯˜: {ollama_response.status_code} - {error_detail[:200]}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
return None
response_data = ollama_response.json()
analysis_result = response_data.get('message', {}).get('content', '')
print(f"[Parent Chunk 생성] Ollama API 응닡 μˆ˜μ‹  성곡: {len(analysis_result)}자")
except requests.exceptions.RequestException as e:
print(f"[Parent Chunk 생성] ❌ Ollama API μ—°κ²° 였λ₯˜: {str(e)}")
print(f"[Parent Chunk 생성] 디버그: Ollama URL: {OLLAMA_BASE_URL}")
raise
if not analysis_result:
print(f"[Parent Chunk 생성] ⚠️ κ²½κ³ : 뢄석 κ²°κ³Όκ°€ λΉ„μ–΄μžˆμŠ΅λ‹ˆλ‹€.")
return None
print(f"[Parent Chunk 생성] 뢄석 κ²°κ³Ό μˆ˜μ‹  μ™„λ£Œ: {len(analysis_result)}자")
# 뢄석 κ²°κ³Ό νŒŒμ‹±
world_view = ""
characters = ""
story = ""
episodes = ""
others = ""
# 각 μ„Ήμ…˜ μΆ”μΆœ
sections = {
'world_view': ['## 세계관 μ„€λͺ…', '## 세계관', '세계관 μ„€λͺ…'],
'characters': ['## μ£Όμš” 캐릭터 뢄석', '## μ£Όμš” 캐릭터', 'μ£Όμš” 캐릭터 뢄석', '## 캐릭터'],
'story': ['## μ£Όμš” μŠ€ν† λ¦¬ 뢄석', '## μ£Όμš” μŠ€ν† λ¦¬', 'μ£Όμš” μŠ€ν† λ¦¬ 뢄석', '## μŠ€ν† λ¦¬'],
'episodes': ['## μ£Όμš” μ—ν”Όμ†Œλ“œ 뢄석', '## μ£Όμš” μ—ν”Όμ†Œλ“œ', 'μ£Όμš” μ—ν”Όμ†Œλ“œ 뢄석', '## μ—ν”Όμ†Œλ“œ'],
'others': ['## 기타', '기타']
}
lines = analysis_result.split('\n')
current_section = None
current_content = []
for line in lines:
line_stripped = line.strip()
# μ„Ήμ…˜ 헀더 확인
section_found = False
for section_key, section_headers in sections.items():
for header in section_headers:
if header in line_stripped:
# 이전 μ„Ήμ…˜ μ €μž₯
if current_section:
if current_section == 'world_view':
world_view = '\n'.join(current_content).strip()
elif current_section == 'characters':
characters = '\n'.join(current_content).strip()
elif current_section == 'story':
story = '\n'.join(current_content).strip()
elif current_section == 'episodes':
episodes = '\n'.join(current_content).strip()
elif current_section == 'others':
others = '\n'.join(current_content).strip()
current_section = section_key
current_content = []
section_found = True
break
if section_found:
break
if not section_found and current_section:
# ν˜„μž¬ μ„Ήμ…˜μ— λ‚΄μš© μΆ”κ°€
if line_stripped and not line_stripped.startswith('#'):
current_content.append(line)
# λ§ˆμ§€λ§‰ μ„Ήμ…˜ μ €μž₯
if current_section:
if current_section == 'world_view':
world_view = '\n'.join(current_content).strip()
elif current_section == 'characters':
characters = '\n'.join(current_content).strip()
elif current_section == 'story':
story = '\n'.join(current_content).strip()
elif current_section == 'episodes':
episodes = '\n'.join(current_content).strip()
elif current_section == 'others':
others = '\n'.join(current_content).strip()
# νŒŒμ‹± μ‹€νŒ¨ μ‹œ 전체 λ‚΄μš©μ„ "기타"에 μ €μž₯
if not world_view and not characters and not story and not episodes:
print(f"[Parent Chunk 생성] κ²½κ³ : μ„Ήμ…˜ νŒŒμ‹± μ‹€νŒ¨. 전체 λ‚΄μš©μ„ '기타'에 μ €μž₯ν•©λ‹ˆλ‹€.")
others = analysis_result.strip()
# κΈ°μ‘΄ Parent Chunk μ‚­μ œ (있으면)
existing_parent = ParentChunk.query.filter_by(file_id=file_id).first()
if existing_parent:
db.session.delete(existing_parent)
db.session.commit()
print(f"[Parent Chunk 생성] κΈ°μ‘΄ Parent Chunk μ‚­μ œ μ™„λ£Œ")
# Parent Chunk 생성 및 μ €μž₯
parent_chunk = ParentChunk(
file_id=file_id,
world_view=world_view if world_view else None,
characters=characters if characters else None,
story=story if story else None,
episodes=episodes if episodes else None,
others=others if others else None
)
db.session.add(parent_chunk)
db.session.commit()
print(f"[Parent Chunk 생성] βœ… μ™„λ£Œ: Parent Chunkκ°€ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
print(f"[Parent Chunk 생성] - 세계관: {len(world_view)}자")
print(f"[Parent Chunk 생성] - 캐릭터: {len(characters)}자")
print(f"[Parent Chunk 생성] - μŠ€ν† λ¦¬: {len(story)}자")
print(f"[Parent Chunk 생성] - μ—ν”Όμ†Œλ“œ: {len(episodes)}자")
print(f"[Parent Chunk 생성] - 기타: {len(others)}자")
return parent_chunk
except requests.exceptions.RequestException as e:
error_msg = f'Ollama API μ—°κ²° 였λ₯˜: {str(e)}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
import traceback
traceback.print_exc()
return None
except Exception as e:
db.session.rollback()
error_msg = f'Parent Chunk 생성 쀑 였λ₯˜: {str(e)}'
print(f"[Parent Chunk 생성] ❌ 였λ₯˜: {error_msg}")
import traceback
traceback.print_exc()
return None
def get_parent_chunks_for_files(file_ids):
"""파일 ID λͺ©λ‘μ— λŒ€ν•œ Parent Chunk 쑰회 (λ¬Έλ§₯ νŒŒμ•…μš©)"""
try:
if not file_ids:
return []
parent_chunks = []
for file_id in file_ids:
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
if parent_chunk:
parent_chunks.append(parent_chunk)
return parent_chunks
except Exception as e:
print(f"[Parent Chunk 쑰회] 였λ₯˜: {str(e)}")
return []
def search_relevant_chunks(query, file_ids=None, model_name=None, top_k=5, min_score=1):
"""
질문과 κ΄€λ ¨λœ 청크 검색 (벑터 검색 + Re-ranking)
1. 벑터 κ²€μƒ‰μœΌλ‘œ 초기 30개 λ¬Έμ„œ 검색
2. Cross-Encoder둜 λ¦¬λž­ν‚Ή
3. μƒμœ„ top_k개 λ°˜ν™˜ (κΈ°λ³Έ 5개)
"""
try:
# 벑터 DB λ§€λ‹ˆμ € κ°€μ Έμ˜€κΈ°
vector_db = get_vector_db()
# 파일 ID ν™•μž₯ (μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ 파일 포함)
expanded_file_ids = None
if file_ids:
expanded_file_ids = list(file_ids)
for file_id in file_ids:
# 원본 파일인 경우 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
expanded_file_ids.extend([child.id for child in child_files])
# 원본 파일이 μ„ νƒλœ 경우, μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all()
for parent_file in parent_files:
child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all()
expanded_file_ids.extend([child.id for child in child_files])
# λͺ¨λΈ 필터링이 ν•„μš”ν•œ 경우 파일 ID 필터링
if model_name and expanded_file_ids:
filtered_files = UploadedFile.query.filter(
UploadedFile.id.in_(expanded_file_ids),
UploadedFile.model_name == model_name
).all()
expanded_file_ids = [f.id for f in filtered_files]
elif model_name and not expanded_file_ids:
# 파일 IDκ°€ μ—†μœΌλ©΄ λͺ¨λΈ μ΄λ¦„μœΌλ‘œλ§Œ 필터링
filtered_files = UploadedFile.query.filter_by(model_name=model_name).all()
expanded_file_ids = [f.id for f in filtered_files]
# 1단계: 벑터 κ²€μƒ‰μœΌλ‘œ 초기 30개 λ¬Έμ„œ 검색
print(f"[벑터 검색] 쿼리: {query[:50]}..., 파일 ID: {expanded_file_ids if expanded_file_ids else 'λͺ¨λ“  파일'}")
vector_results = vector_db.search_chunks(
query=query,
file_ids=expanded_file_ids,
top_k=30
)
if not vector_results:
print(f"[벑터 검색] κ²°κ³Ό μ—†μŒ, ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄")
# 벑터 검색 κ²°κ³Όκ°€ μ—†μœΌλ©΄ κΈ°μ‘΄ ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄
return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score)
# 2단계: Cross-Encoder둜 λ¦¬λž­ν‚Ή
print(f"[λ¦¬λž­ν‚Ή] {len(vector_results)}개 청크에 λŒ€ν•œ λ¦¬λž­ν‚Ή μ‹œμž‘...")
reranked_chunks = vector_db.rerank_chunks(
query=query,
chunks=vector_results,
top_k=top_k
)
# 3단계: DBμ—μ„œ 청크 객체 κ°€μ Έμ˜€κΈ°
final_chunks = []
for reranked in reranked_chunks:
chunk_id = reranked['chunk_id']
chunk = DocumentChunk.query.get(chunk_id)
if chunk:
final_chunks.append(chunk)
print(f"[벑터 검색 + λ¦¬λž­ν‚Ή] μ΅œμ’… {len(final_chunks)}개 청크 λ°˜ν™˜")
return final_chunks
except Exception as e:
print(f"[벑터 검색] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
# 였λ₯˜ μ‹œ κΈ°μ‘΄ ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄
print(f"[벑터 검색] ν‚€μ›Œλ“œ 기반 κ²€μƒ‰μœΌλ‘œ λŒ€μ²΄")
return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score)
def search_relevant_chunks_fallback(query, file_ids=None, model_name=None, top_k=25, min_score=1):
"""κΈ°μ‘΄ ν‚€μ›Œλ“œ 기반 검색 (Fallback)"""
try:
# 검색 쿼리 μ€€λΉ„ - ν•œκΈ€κ³Ό 영문 단어 λͺ¨λ‘ μΆ”μΆœ
query_words = set(re.findall(r'[κ°€-힣]+|\w+', query.lower()))
if not query_words:
return []
# 청크 쑰회
query_obj = DocumentChunk.query.join(UploadedFile)
if file_ids:
# μ„ νƒλœ 파일 ID와 κ·Έ νŒŒμΌμ— μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ λͺ¨λ“  파일 ID 포함
expanded_file_ids = list(file_ids)
for file_id in file_ids:
# 원본 파일인 경우 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
expanded_file_ids.extend([child.id for child in child_files])
# 원본 파일이 μ„ νƒλœ 경우, μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all()
for parent_file in parent_files:
child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all()
expanded_file_ids.extend([child.id for child in child_files])
query_obj = query_obj.filter(UploadedFile.id.in_(expanded_file_ids))
if model_name:
query_obj = query_obj.filter(UploadedFile.model_name == model_name)
all_chunks = query_obj.all()
if not all_chunks:
return []
# 각 청크의 관련도 점수 계산 (κ°œμ„ λœ μ•Œκ³ λ¦¬μ¦˜)
scored_chunks = []
for chunk in all_chunks:
chunk_content_lower = chunk.content.lower()
chunk_words = set(re.findall(r'[κ°€-힣]+|\w+', chunk_content_lower))
# 1. 곡톡 단어 수 (κΈ°λ³Έ 점수)
common_words = query_words & chunk_words
base_score = len(common_words)
# 2. 쿼리 λ‹¨μ–΄μ˜ λΉˆλ„ κ°€μ€‘μΉ˜ (μ€‘μš”ν•œ 단어가 더 많이 λ‚˜νƒ€λ‚ μˆ˜λ‘ 높은 점수)
frequency_score = 0
for word in query_words:
frequency_score += chunk_content_lower.count(word)
# 3. 쿼리 단어 λΉ„μœ¨ (μ²­ν¬μ—μ„œ 쿼리 단어가 μ°¨μ§€ν•˜λŠ” λΉ„μœ¨)
if len(chunk_words) > 0:
ratio_score = len(common_words) / len(chunk_words) * 10
else:
ratio_score = 0
# μ΅œμ’… 점수 계산 (κ°€μ€‘μΉ˜ 적용)
final_score = base_score * 2 + frequency_score * 0.5 + ratio_score
# μ΅œμ†Œ 점수 이상인 청크만 포함
if final_score >= min_score:
scored_chunks.append((final_score, chunk))
# 점수 순으둜 μ •λ ¬ν•˜κ³  μƒμœ„ k개 선택
scored_chunks.sort(key=lambda x: x[0], reverse=True)
# top_k개 선택
top_chunks = [chunk for score, chunk in scored_chunks[:top_k]]
return top_chunks
except Exception as e:
print(f"[ν‚€μ›Œλ“œ 검색] 였λ₯˜: {str(e)}")
import traceback
traceback.print_exc()
return []
@main_bp.route('/login', methods=['GET', 'POST'])
def login():
"""둜그인 νŽ˜μ΄μ§€"""
if current_user.is_authenticated:
# κ΄€λ¦¬μžμΈ 경우 κ΄€λ¦¬μž νŽ˜μ΄μ§€λ‘œ λ¦¬λ‹€μ΄λ ‰νŠΈ
if current_user.is_admin:
return redirect(url_for('main.admin'))
return redirect(url_for('main.index'))
if request.method == 'POST':
username = request.form.get('username', '').strip()
password = request.form.get('password', '')
if not username or not password:
flash('μ‚¬μš©μžλͺ…κ³Ό λΉ„λ°€λ²ˆν˜Έλ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.', 'error')
return render_template('login.html')
user = User.query.filter_by(username=username).first()
if user and user.check_password(password) and user.is_active:
login_user(user)
user.last_login = datetime.utcnow()
db.session.commit()
next_page = request.args.get('next')
# κ΄€λ¦¬μžμΈ 경우 κ΄€λ¦¬μž νŽ˜μ΄μ§€λ‘œ λ¦¬λ‹€μ΄λ ‰νŠΈ
if user.is_admin:
return redirect(next_page) if next_page else redirect(url_for('main.admin'))
return redirect(next_page) if next_page else redirect(url_for('main.index'))
else:
flash('μ‚¬μš©μžλͺ… λ˜λŠ” λΉ„λ°€λ²ˆν˜Έκ°€ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.', 'error')
return render_template('login.html')
@main_bp.route('/logout')
@login_required
def logout():
"""λ‘œκ·Έμ•„μ›ƒ"""
logout_user()
flash('λ‘œκ·Έμ•„μ›ƒλ˜μ—ˆμŠ΅λ‹ˆλ‹€.', 'info')
return redirect(url_for('main.login'))
@main_bp.route('/')
@login_required
def index():
return render_template('index.html')
@main_bp.route('/admin')
@admin_required
def admin():
"""κ΄€λ¦¬μž νŽ˜μ΄μ§€"""
users = User.query.order_by(User.created_at.desc()).all()
return render_template('admin.html', users=users)
@main_bp.route('/admin/messages')
@admin_required
def admin_messages():
"""κ΄€λ¦¬μž λ©”μ‹œμ§€ 확인 νŽ˜μ΄μ§€"""
return render_template('admin_messages.html')
@main_bp.route('/admin/webnovels')
@admin_required
def admin_webnovels():
"""μ›Ήμ†Œμ„€ 관리 νŽ˜μ΄μ§€"""
return render_template('admin_webnovels.html')
@main_bp.route('/api/admin/users', methods=['GET'])
@admin_required
def get_users():
"""μ‚¬μš©μž λͺ©λ‘ API"""
try:
users = User.query.order_by(User.created_at.desc()).all()
return jsonify({
'users': [user.to_dict() for user in users]
}), 200
except Exception as e:
return jsonify({'error': f'μ‚¬μš©μž λͺ©λ‘ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/users', methods=['POST'])
@admin_required
def create_user():
"""μ‚¬μš©μž 생성 API"""
try:
data = request.json
username = data.get('username', '').strip()
nickname = data.get('nickname', '').strip()
password = data.get('password', '')
is_admin = data.get('is_admin', False)
if not username or not password:
return jsonify({'error': 'μ‚¬μš©μžλͺ…κ³Ό λΉ„λ°€λ²ˆν˜Έλ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.'}), 400
if User.query.filter_by(username=username).first():
return jsonify({'error': '이미 μ‘΄μž¬ν•˜λŠ” μ‚¬μš©μžλͺ…μž…λ‹ˆλ‹€.'}), 400
user = User(username=username, nickname=nickname if nickname else None, is_admin=is_admin, is_active=True)
user.set_password(password)
db.session.add(user)
db.session.commit()
return jsonify({
'message': 'μ‚¬μš©μžκ°€ μ„±κ³΅μ μœΌλ‘œ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'user': user.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'μ‚¬μš©μž 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/users/<int:user_id>', methods=['PUT'])
@admin_required
def update_user(user_id):
"""μ‚¬μš©μž 정보 μˆ˜μ • API"""
try:
user = User.query.get_or_404(user_id)
data = request.json
# 자기 μžμ‹ μ˜ κ΄€λ¦¬μž κΆŒν•œμ„ μ œκ±°ν•˜λŠ” 것은 λ°©μ§€
if user_id == current_user.id and data.get('is_admin') == False:
return jsonify({'error': '자기 μžμ‹ μ˜ κ΄€λ¦¬μž κΆŒν•œμ„ μ œκ±°ν•  수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
if 'username' in data:
new_username = data['username'].strip()
if new_username != user.username:
if User.query.filter_by(username=new_username).first():
return jsonify({'error': '이미 μ‘΄μž¬ν•˜λŠ” μ‚¬μš©μžλͺ…μž…λ‹ˆλ‹€.'}), 400
user.username = new_username
if 'nickname' in data:
user.nickname = data['nickname'].strip() if data['nickname'] else None
if 'password' in data and data['password']:
user.set_password(data['password'])
if 'is_admin' in data:
user.is_admin = data['is_admin']
if 'is_active' in data:
user.is_active = data['is_active']
db.session.commit()
return jsonify({
'message': 'μ‚¬μš©μž 정보가 μ„±κ³΅μ μœΌλ‘œ μˆ˜μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'user': user.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'μ‚¬μš©μž 정보 μˆ˜μ • 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/messages', methods=['GET'])
@admin_required
def get_all_messages():
"""전체 λ©”μ‹œμ§€ 쑰회 (κ΄€λ¦¬μžμš©)"""
try:
user_id = request.args.get('user_id', type=int)
session_id = request.args.get('session_id', type=int)
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 50, type=int)
query = ChatMessage.query.join(ChatSession)
if user_id:
query = query.filter(ChatSession.user_id == user_id)
if session_id:
query = query.filter(ChatMessage.session_id == session_id)
messages = query.order_by(ChatMessage.created_at.desc())\
.paginate(page=page, per_page=per_page, error_out=False)
return jsonify({
'messages': [msg.to_dict() for msg in messages.items],
'total': messages.total,
'pages': messages.pages,
'current_page': page
}), 200
except Exception as e:
return jsonify({'error': f'λ©”μ‹œμ§€ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/sessions', methods=['GET'])
@admin_required
def get_all_sessions():
"""전체 λŒ€ν™” μ„Έμ…˜ 쑰회 (κ΄€λ¦¬μžμš©)"""
try:
user_id = request.args.get('user_id', type=int)
page = request.args.get('page', 1, type=int)
per_page = request.args.get('per_page', 50, type=int)
query = ChatSession.query
if user_id:
query = query.filter(ChatSession.user_id == user_id)
sessions = query.order_by(ChatSession.updated_at.desc())\
.paginate(page=page, per_page=per_page, error_out=False)
sessions_data = []
for session in sessions.items:
session_dict = session.to_dict()
session_dict['username'] = session.user.username if session.user else 'Unknown'
session_dict['nickname'] = session.user.nickname if session.user else None
sessions_data.append(session_dict)
return jsonify({
'sessions': sessions_data,
'total': sessions.total,
'pages': sessions.pages,
'current_page': page
}), 200
except Exception as e:
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/users/<int:user_id>', methods=['DELETE'])
@admin_required
def delete_user(user_id):
"""μ‚¬μš©μž μ‚­μ œ API"""
try:
user = User.query.get_or_404(user_id)
# 자기 μžμ‹ μ„ μ‚­μ œν•˜λŠ” 것은 λ°©μ§€
if user_id == current_user.id:
return jsonify({'error': '자기 μžμ‹ μ„ μ‚­μ œν•  수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
db.session.delete(user)
db.session.commit()
return jsonify({'message': 'μ‚¬μš©μžκ°€ μ„±κ³΅μ μœΌλ‘œ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.'}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'μ‚¬μš©μž μ‚­μ œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/gemini-api-key', methods=['GET'])
@admin_required
def get_gemini_api_key():
"""Gemini API ν‚€ 쑰회"""
try:
# SystemConfigμ—μ„œ API ν‚€ κ°€μ Έμ˜€κΈ° (ν…Œμ΄λΈ”μ΄ μ—†μœΌλ©΄ 빈 λ¬Έμžμ—΄ λ°˜ν™˜)
api_key = SystemConfig.get_config('gemini_api_key', '')
# λ³΄μ•ˆμ„ μœ„ν•΄ λ§ˆμŠ€ν‚Ήλœ κ°’ λ°˜ν™˜ (처음 8자만 ν‘œμ‹œ)
masked_key = api_key[:8] + '...' if api_key and len(api_key) > 8 else ''
return jsonify({
'has_api_key': bool(api_key),
'masked_key': masked_key
}), 200
except Exception as e:
print(f"[Gemini API ν‚€ 쑰회] 였λ₯˜: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'API ν‚€ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/admin/gemini-api-key', methods=['POST'])
@admin_required
def set_gemini_api_key():
"""Gemini API ν‚€ μ €μž₯/μ—…λ°μ΄νŠΈ"""
try:
if not request.is_json:
return jsonify({'error': 'Content-Type이 application/json이 μ•„λ‹™λ‹ˆλ‹€.'}), 400
data = request.json
if not data:
return jsonify({'error': 'μš”μ²­ 데이터가 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
api_key = data.get('api_key', '').strip()
if not api_key:
return jsonify({'error': 'API ν‚€λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.'}), 400
# API ν‚€ μ €μž₯ (SystemConfig.set_config λ‚΄λΆ€μ—μ„œ ν…Œμ΄λΈ” 생성 처리)
SystemConfig.set_config(
key='gemini_api_key',
value=api_key,
description='Google Gemini API ν‚€'
)
# Gemini ν΄λΌμ΄μ–ΈνŠΈμ— API ν‚€ μž¬λ‘œλ“œ μ•Œλ¦Ό
try:
from app.gemini_client import reset_gemini_client
reset_gemini_client()
print(f"[Gemini] API ν‚€κ°€ μ—…λ°μ΄νŠΈλ˜μ–΄ ν΄λΌμ΄μ–ΈνŠΈκ°€ μž¬λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[Gemini] API ν‚€ μž¬λ‘œλ“œ μ‹€νŒ¨: {e}")
return jsonify({
'message': 'Gemini API ν‚€κ°€ μ„±κ³΅μ μœΌλ‘œ μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'has_api_key': True
}), 200
except Exception as e:
db.session.rollback()
print(f"[Gemini API ν‚€ μ €μž₯] 였λ₯˜: {e}")
import traceback
traceback.print_exc()
return jsonify({'error': f'API ν‚€ μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/ollama/models', methods=['GET'])
@login_required
def get_ollama_models():
"""Ollama 및 Geminiμ—μ„œ μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ°"""
try:
all_models = []
# 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
try:
response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5)
if response.status_code == 200:
data = response.json()
ollama_models = [{'name': model['name'], 'type': 'ollama'} for model in data.get('models', [])]
all_models.extend(ollama_models)
print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(ollama_models)}개 μΆ”κ°€")
except Exception as e:
print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ 쑰회 μ‹€νŒ¨: {e}")
# 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°€μ Έμ˜€κΈ°
try:
gemini_client = get_gemini_client()
if gemini_client.is_configured():
gemini_models = gemini_client.get_available_models()
gemini_model_list = [{'name': f'gemini:{model_name}', 'type': 'gemini'} for model_name in gemini_models]
all_models.extend(gemini_model_list)
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(gemini_model_list)}개 μΆ”κ°€")
else:
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•„ Gemini λͺ¨λΈμ„ 뢈러올 수 μ—†μŠ΅λ‹ˆλ‹€.")
except Exception as e:
print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ 쑰회 μ‹€νŒ¨: {e}")
if all_models:
return jsonify({'models': all_models})
else:
return jsonify({'error': 'μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈμ΄ μ—†μŠ΅λ‹ˆλ‹€. Ollamaκ°€ μ‹€ν–‰ 쀑인지, λ˜λŠ” Gemini API ν‚€κ°€ μ„€μ •λ˜μ—ˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”.', 'models': []}), 500
except Exception as e:
return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ„ κ°€μ Έμ˜€λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}', 'models': []}), 500
@main_bp.route('/api/chat', methods=['POST'])
@login_required
def chat():
"""μ±„νŒ… API μ—”λ“œν¬μΈνŠΈ"""
try:
data = request.json
message = data.get('message', '')
model = data.get('model', '')
file_ids = [int(fid) for fid in data.get('file_ids', []) if fid] # μ„ νƒν•œ μ›Ήμ†Œμ„€ 파일 ID λͺ©λ‘
session_id = data.get('session_id', None) # λŒ€ν™” μ„Έμ…˜ ID (μ •μˆ˜λ‘œ λ³€ν™˜)
if not message:
return jsonify({'error': 'λ©”μ‹œμ§€κ°€ ν•„μš”ν•©λ‹ˆλ‹€.'}), 400
# λͺ¨λΈμ΄ μ„ νƒλœ 경우 Ollama μ‚¬μš©
if model:
try:
# RAG: 질문과 κ΄€λ ¨λœ 청크 검색
context = ""
use_rag = True # RAG μ‚¬μš© μ—¬λΆ€
if use_rag:
print(f"\n[RAG 검색] λͺ¨λΈ: {model}, 질문: {message[:50]}...")
print(f"[RAG 검색] μ„ νƒλœ 파일 ID: {file_ids if file_ids else 'μ—†μŒ (λͺ¨λ“  파일 검색)'}")
# 1단계: Parent Chunk둜 λ¬Έλ§₯ νŒŒμ•…
parent_chunks = []
if file_ids:
print(f"[RAG 검색 1단계] Parent Chunk 쑰회 μ‹œμž‘...")
parent_chunks = get_parent_chunks_for_files(file_ids)
print(f"[RAG 검색 1단계] Parent Chunk 쑰회 μ™„λ£Œ: {len(parent_chunks)}개 파일")
# 2단계: 벑터 검색 + λ¦¬λž­ν‚ΉμœΌλ‘œ Child Chunk μ •λ°€ 검색
print(f"[RAG 검색 2단계] 벑터 검색 + λ¦¬λž­ν‚Ή μ‹œμž‘...")
relevant_chunks = search_relevant_chunks(
query=message,
file_ids=file_ids if file_ids else None,
model_name=model,
top_k=5, # λ¦¬λž­ν‚Ή ν›„ μƒμœ„ 5개만 선택
min_score=0.5 # μ΅œμ†Œ 점수 μž„κ³„κ°’
)
print(f"[RAG 검색 2단계] 벑터 검색 + λ¦¬λž­ν‚Ή μ™„λ£Œ: {len(relevant_chunks)}개 청크 (μƒμœ„ 5개)")
# μ»¨ν…μŠ€νŠΈ ꡬ성
context_parts = []
# Parent Chunk 정보 μΆ”κ°€ (λ¬Έλ§₯ νŒŒμ•…μš©)
if parent_chunks:
parent_context_sections = []
for parent_chunk in parent_chunks:
file = parent_chunk.file
file_info = f"\n=== {file.original_filename} 전체 κ°œμš” ===\n"
sections = []
if parent_chunk.world_view:
sections.append(f"[세계관]\n{parent_chunk.world_view}")
if parent_chunk.characters:
sections.append(f"[μ£Όμš” 캐릭터]\n{parent_chunk.characters}")
if parent_chunk.story:
sections.append(f"[μ£Όμš” μŠ€ν† λ¦¬]\n{parent_chunk.story}")
if parent_chunk.episodes:
sections.append(f"[μ£Όμš” μ—ν”Όμ†Œλ“œ]\n{parent_chunk.episodes}")
if parent_chunk.others:
sections.append(f"[기타 정보]\n{parent_chunk.others}")
if sections:
parent_context_sections.append(file_info + "\n\n".join(sections))
if parent_context_sections:
parent_context = "\n\n".join(parent_context_sections)
context_parts.append(f"λ‹€μŒμ€ μ›Ήμ†Œμ„€μ˜ 전체적인 λ¬Έλ§₯κ³Ό κ°œμš”μž…λ‹ˆλ‹€:\n\n{parent_context}")
print(f"[RAG 검색] Parent Chunk μ»¨ν…μŠ€νŠΈ μΆ”κ°€: {len(parent_context)}자")
# Child Chunk 정보 μΆ”κ°€ (μ •λ°€ 검색 κ²°κ³Ό)
if relevant_chunks:
child_context_parts = []
seen_files = set()
for chunk in relevant_chunks:
file = chunk.file
if file.original_filename not in seen_files:
seen_files.add(file.original_filename)
print(f"[RAG 검색] μ‚¬μš©λœ 파일: {file.original_filename} (λͺ¨λΈ: {file.model_name})")
child_context_parts.append(f"[{file.original_filename} - 청크 {chunk.chunk_index + 1}]\n{chunk.content}")
if child_context_parts:
# μ»¨ν…μŠ€νŠΈ 길이 확인 및 μ΅œμ ν™”
full_child_context = "\n\n".join(child_context_parts)
child_context_length = len(full_child_context)
# Child Chunk μ»¨ν…μŠ€νŠΈκ°€ λ„ˆλ¬΄ κΈΈλ©΄ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 15000자)
if child_context_length > 15000:
truncated_parts = []
current_length = 0
for part in child_context_parts:
if current_length + len(part) > 15000:
break
truncated_parts.append(part)
current_length += len(part)
full_child_context = "\n\n".join(truncated_parts)
print(f"[RAG 검색] Child Chunk μ»¨ν…μŠ€νŠΈ 길이 쑰절: {child_context_length}자 β†’ {len(full_child_context)}자")
context_parts.append(f"λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ μ›Ήμ†Œμ„€μ˜ ꡬ체적인 λ‚΄μš©μž…λ‹ˆλ‹€ (μ •λ°€ 검색 κ²°κ³Ό, 총 {len(relevant_chunks)}개 청크):\n\n{full_child_context}")
print(f"[RAG 검색] Child Chunk μ»¨ν…μŠ€νŠΈ μΆ”κ°€: {len(full_child_context)}자")
# μ΅œμ’… μ»¨ν…μŠ€νŠΈ ꡬ성
if context_parts:
full_context = "\n\n" + "\n\n---\n\n".join(context_parts) + "\n\n"
# Parent Chunk와 Child Chunk λͺ¨λ‘ μžˆλŠ” 경우
if parent_chunks and relevant_chunks:
context = f"""λ‹€μŒμ€ μ§ˆλ¬Έμ— λ‹΅ν•˜κΈ° μœ„ν•œ μ›Ήμ†Œμ„€ μ •λ³΄μž…λ‹ˆλ‹€:
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ λ‹΅λ³€ν•΄μ£Όμ„Έμš”:
- λ¨Όμ € 전체적인 λ¬Έλ§₯(Parent Chunk)을 μ΄ν•΄ν•˜μ—¬ μ›Ήμ†Œμ„€μ˜ λ°°κ²½κ³Ό 섀정을 νŒŒμ•…ν•˜μ„Έμš”.
- κ·Έ λ‹€μŒ ꡬ체적인 λ‚΄μš©(Child Chunk)을 톡해 μ§ˆλ¬Έμ— λŒ€ν•œ μ •ν™•ν•œ 닡변을 μ œκ³΅ν•˜μ„Έμš”.
- μ›Ήμ†Œμ„€μ˜ λ§₯락과 μŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ 일관성 μžˆλŠ” 닡변을 μž‘μ„±ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
elif parent_chunks:
# Parent Chunk만 μžˆλŠ” 경우
context = f"""λ‹€μŒμ€ μ›Ήμ†Œμ„€μ˜ 전체적인 λ¬Έλ§₯κ³Ό κ°œμš”μž…λ‹ˆλ‹€:
{full_context}
μœ„ 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ μ§ˆλ¬Έμ— λ‹΅λ³€ν•΄μ£Όμ„Έμš”. μ›Ήμ†Œμ„€μ˜ λ°°κ²½κ³Ό 섀정을 κ³ λ €ν•˜μ—¬ λ‹΅λ³€ν•˜μ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
else:
# Child Chunk만 μžˆλŠ” 경우
context = f"""λ‹€μŒμ€ 질문과 κ΄€λ ¨λœ μ›Ήμ†Œμ„€μ˜ ꡬ체적인 λ‚΄μš©μž…λ‹ˆλ‹€:
{full_context}
μœ„ λ‚΄μš©μ„ μΆ©λΆ„νžˆ μ°Έκ³ ν•˜μ—¬ λ‹€μŒ μ§ˆλ¬Έμ— μ •ν™•ν•˜κ³  μƒμ„Έν•˜κ²Œ λ‹΅λ³€ν•΄μ£Όμ„Έμš”. μ›Ήμ†Œμ„€μ˜ λ§₯락과 μŠ€ν† λ¦¬λ₯Ό κ³ λ €ν•˜μ—¬ λ‹΅λ³€ν•΄μ£Όμ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
context += message
print(f"[RAG 검색] μ΅œμ’… μ»¨ν…μŠ€νŠΈ 생성 μ™„λ£Œ (Parent Chunk: {len(parent_chunks)}개, Child Chunk: {len(relevant_chunks)}개, 총 {len(context)}자)")
else:
# RAG 검색 κ²°κ³Όκ°€ μ—†μœΌλ©΄ κΈ°μ‘΄ 방식 μ‚¬μš©
print(f"[RAG 검색] κ΄€λ ¨ 청크λ₯Ό μ°Ύμ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€. 전체 파일 λ‚΄μš© μ‚¬μš©")
use_rag = False
# RAG 검색 κ²°κ³Όκ°€ μ—†κ±°λ‚˜ λΉ„ν™œμ„±ν™”λœ 경우 κΈ°μ‘΄ 방식 μ‚¬μš©
if not context and not use_rag:
if file_ids:
# μ„ νƒν•œ 파일 ID와 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
expanded_file_ids = list(file_ids)
for file_id in file_ids:
# 원본 파일인 경우 μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 포함
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
expanded_file_ids.extend([child.id for child in child_files])
uploaded_files = UploadedFile.query.filter(
UploadedFile.id.in_(expanded_file_ids),
UploadedFile.model_name == model
).all()
print(f"[파일 μ‚¬μš©] μ„ νƒλœ 파일 ID둜 쑰회 (μ΄μ–΄μ„œ μ—…λ‘œλ“œ 포함): {len(uploaded_files)}개 파일")
else:
# 파일 IDκ°€ μ—†μœΌλ©΄ ν•΄λ‹Ή λͺ¨λΈμ˜ λͺ¨λ“  파일 μ‚¬μš© (원본 및 μ΄μ–΄μ„œ μ—…λ‘œλ“œ 포함)
uploaded_files = UploadedFile.query.filter_by(model_name=model).all()
print(f"[파일 μ‚¬μš©] λͺ¨λΈ '{model}'의 λͺ¨λ“  파일 μ‚¬μš©: {len(uploaded_files)}개 파일")
if uploaded_files:
print(f"[파일 μ‚¬μš©] μ‚¬μš©λ˜λŠ” 파일 λͺ©λ‘:")
for f in uploaded_files:
is_child = f.parent_file_id is not None
prefix = " └─ " if is_child else " - "
print(f"{prefix}{f.original_filename} (λͺ¨λΈ: {f.model_name})")
context_parts = []
for file in uploaded_files:
try:
if os.path.exists(file.file_path):
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
file_content = f.read()
except UnicodeDecodeError:
with open(file.file_path, 'r', encoding='cp949') as f:
file_content = f.read()
# 파일 λ‚΄μš©μ΄ λ„ˆλ¬΄ κΈΈλ©΄ μΌλΆ€λ§Œ μ‚¬μš© (μ΅œλŒ€ 20000자둜 증가)
if len(file_content) > 20000:
file_content = file_content[:20000] + "..."
context_parts.append(f"[{file.original_filename}]\n{file_content}")
except Exception as e:
print(f"파일 읽기 였λ₯˜ ({file.original_filename}): {str(e)}")
continue
if context_parts:
context = "\n\n".join(context_parts)
context = f"""λ‹€μŒμ€ ν•™μŠ΅λœ μ›Ήμ†Œμ„€ λ‚΄μš©μž…λ‹ˆλ‹€:
{context}
μœ„ λ‚΄μš©μ„ μ°Έκ³ ν•˜μ—¬ λ‹€μŒ μ§ˆλ¬Έμ— λ‹΅λ³€ν•΄μ£Όμ„Έμš”.
μ€‘μš”: μ§ˆλ¬Έμ— λ‹΅λ³€ν•  λ•ŒλŠ” λ°˜λ“œμ‹œ 제곡된 [μ†Œμ„€ λ³Έλ¬Έ] λ‚΄μ˜ λ‚΄μš©μ„ 근거둜 ν•΄μ•Ό ν•©λ‹ˆλ‹€.
λ‹΅λ³€μ˜ 각 λ¬Έμž₯ λμ—λŠ” μ°Έκ³ ν•œ 본문의 λ¬Έμž₯을 [κ·Όκ±°: "λ¬Έμž₯ λ‚΄μš©..."] ν˜•μ‹μœΌλ‘œ λ°˜λ“œμ‹œ λΆ™μ΄μ„Έμš”.
κ·Όκ±°λ₯Ό 찾을 수 μ—†λ‹€λ©΄ "λ‚΄μš©μ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"라고 λ‹΅ν•˜κ³  μ§€μ–΄λ‚΄μ§€ λ§ˆμ„Έμš”.
질문:
"""
# ν”„λ‘¬ν”„νŠΈ ꡬ성
full_prompt = context + message if context else message
# λͺ¨λΈ νƒ€μž… 확인 (Gemini λ˜λŠ” Ollama)
is_gemini = model.startswith('gemini:')
if is_gemini:
# Gemini API 호좜
gemini_model_name = model.replace('gemini:', '')
print(f"[Gemini] λͺ¨λΈ: {gemini_model_name}, 질문: {message[:50]}...")
gemini_client = get_gemini_client()
if not gemini_client.is_configured():
return jsonify({'error': 'Gemini API ν‚€κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. GEMINI_API_KEY ν™˜κ²½ λ³€μˆ˜λ₯Ό μ„€μ •ν•˜μ„Έμš”.'}), 500
result = gemini_client.generate_response(
prompt=full_prompt,
model_name=gemini_model_name,
temperature=0.7,
max_output_tokens=8192
)
if result['error']:
return jsonify({'error': result['error']}), 500
response_text = result['response']
else:
# Ollama API 호좜
ollama_response = requests.post(
f'{OLLAMA_BASE_URL}/api/generate',
json={
'model': model,
'prompt': full_prompt,
'stream': False
},
timeout=120 # 파일이 λ§Žμ„ 수 μžˆμœΌλ―€λ‘œ νƒ€μž„μ•„μ›ƒ 증가
)
if ollama_response.status_code != 200:
# 였λ₯˜ 상세 정보 κ°€μ Έμ˜€κΈ°
try:
error_detail = ollama_response.json().get('error', ollama_response.text[:200])
except:
error_detail = ollama_response.text[:200] if ollama_response.text else '상세 정보 μ—†μŒ'
if ollama_response.status_code == 404:
error_msg = f'λͺ¨λΈ "{model}"을(λ₯Ό) 찾을 수 μ—†μŠ΅λ‹ˆλ‹€. λͺ¨λΈμ΄ Ollama에 μ„€μΉ˜λ˜μ–΄ μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”. (였λ₯˜: {error_detail})'
else:
error_msg = f'Ollama μ„œλ²„ 였λ₯˜: {ollama_response.status_code} (였λ₯˜: {error_detail})'
return jsonify({'error': error_msg}), ollama_response.status_code
ollama_data = ollama_response.json()
response_text = ollama_data.get('response', '응닡을 생성할 수 μ—†μŠ΅λ‹ˆλ‹€.')
# λŒ€ν™” μ„Έμ…˜μ— λ©”μ‹œμ§€ μ €μž₯ (Gemini와 Ollama 곡톡)
session_id = data.get('session_id')
session_dict = None
if session_id:
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first()
if session:
# μ‚¬μš©μž λ©”μ‹œμ§€κ°€ 이미 μ €μž₯λ˜μ–΄ μžˆλŠ”μ§€ 확인 (쀑볡 λ°©μ§€)
# κ°€μž₯ 졜근 λ©”μ‹œμ§€λ₯Ό ν™•μΈν•˜μ—¬ 쀑볡 μ €μž₯ λ°©μ§€
latest_user_msg = ChatMessage.query.filter_by(
session_id=session_id,
role='user'
).order_by(ChatMessage.created_at.desc()).first()
# 졜근 10초 이내에 같은 λ‚΄μš©μ˜ λ©”μ‹œμ§€κ°€ μ—†μœΌλ©΄ μ €μž₯
should_save = True
if latest_user_msg:
time_diff = (datetime.utcnow() - latest_user_msg.created_at).total_seconds()
if latest_user_msg.content == message and time_diff < 10:
should_save = False
print(f"[쀑볡 λ°©μ§€] 졜근 {time_diff:.2f}초 전에 같은 λ©”μ‹œμ§€κ°€ μ €μž₯λ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€. μ €μž₯을 κ±΄λ„ˆλœλ‹ˆλ‹€.")
if should_save:
user_msg = ChatMessage(
session_id=session_id,
role='user',
content=message
)
db.session.add(user_msg)
print(f"[λ©”μ‹œμ§€ μ €μž₯] μ‚¬μš©μž λ©”μ‹œμ§€ μ €μž₯: {message[:50]}...")
# μ„Έμ…˜ 제λͺ© μ—…λ°μ΄νŠΈ (첫 μ‚¬μš©μž λ©”μ‹œμ§€μΈ 경우)
title_needs_update = (
not session.title or
session.title.strip() == '' or
session.title == 'μƒˆ λŒ€ν™”'
)
if title_needs_update and message.strip():
# λ©”μ‹œμ§€ λ‚΄μš©μ„ 제λͺ©μœΌλ‘œ μ‚¬μš© (μ΅œλŒ€ 30자)
title = message.strip()[:30]
if len(message.strip()) > 30:
title += '...'
session.title = title
print(f"[μ„Έμ…˜ 제λͺ©] μ—…λ°μ΄νŠΈ: '{title}' (원본 길이: {len(message.strip())}자)")
elif title_needs_update:
print(f"[μ„Έμ…˜ 제λͺ©] λ©”μ‹œμ§€κ°€ λΉ„μ–΄μžˆμ–΄ 제λͺ©μ„ μ—…λ°μ΄νŠΈν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
else:
print(f"[λ©”μ‹œμ§€ μ €μž₯] 쀑볡 λ©”μ‹œμ§€λ‘œ 인해 μ €μž₯을 κ±΄λ„ˆλœλ‹ˆλ‹€.")
# AI 응닡 μ €μž₯
ai_msg = ChatMessage(
session_id=session_id,
role='ai',
content=response_text
)
db.session.add(ai_msg)
session.updated_at = datetime.utcnow()
db.session.commit()
# μ„Έμ…˜ 정보λ₯Ό 응닡에 포함 (제λͺ© μ—…λ°μ΄νŠΈ 반영)
session_dict = session.to_dict()
except Exception as e:
print(f"λ©”μ‹œμ§€ μ €μž₯ 였λ₯˜: {str(e)}")
db.session.rollback()
session_dict = None
response_data = {'response': response_text, 'session_id': session_id}
if session_dict:
response_data['session'] = session_dict
return jsonify(response_data)
except requests.exceptions.ConnectionError:
return jsonify({'error': 'Ollama μ„œλ²„μ— μ—°κ²°ν•  수 μ—†μŠ΅λ‹ˆλ‹€. Ollamaκ°€ μ‹€ν–‰ 쀑인지 ν™•μΈν•˜μ„Έμš”.'}), 503
except requests.exceptions.Timeout:
return jsonify({'error': '응닡 μ‹œκ°„μ΄ μ΄ˆκ³Όλ˜μ—ˆμŠ΅λ‹ˆλ‹€. 더 짧은 λ©”μ‹œμ§€λ₯Ό μ‹œλ„ν•΄λ³΄μ„Έμš”.'}), 504
except Exception as e:
return jsonify({'error': f'Ollama 톡신 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
else:
# λͺ¨λΈμ΄ μ„ νƒλ˜μ§€ μ•Šμ€ 경우 κΈ°λ³Έ 응닡
response_text = f"μ•ˆλ…•ν•˜μ„Έμš”! '{message}'에 λŒ€ν•œ 닡변을 μ€€λΉ„ μ€‘μž…λ‹ˆλ‹€.\n\n쒌츑 ν•˜λ‹¨μ—μ„œ 둜컬 AI λͺ¨λΈμ„ μ„ νƒν•˜λ©΄ 더 μ •ν™•ν•œ 닡변을 μ œκ³΅ν•  수 μžˆμŠ΅λ‹ˆλ‹€."
return jsonify({'response': response_text})
except Exception as e:
return jsonify({'error': f'μ±„νŒ… 처리 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/upload', methods=['POST'])
@login_required
def upload_file():
"""μ›Ήμ†Œμ„€ 파일 μ—…λ‘œλ“œ"""
import sys
import traceback
# λͺ¨λ“  좜λ ₯을 μ¦‰μ‹œ ν”ŒλŸ¬μ‹œν•˜μ—¬ λ‘œκ·Έκ°€ λ°”λ‘œ 보이도둝
def log_print(*args, **kwargs):
print(*args, **kwargs)
sys.stdout.flush()
try:
log_print(f"\n{'='*60}")
log_print(f"=== 파일 μ—…λ‘œλ“œ μš”μ²­ μ‹œμž‘ ===")
log_print(f"μš”μ²­ λ©”μ„œλ“œ: {request.method}")
log_print(f"Content-Type: {request.content_type}")
log_print(f"Content-Length: {request.content_length}")
log_print(f"Form 데이터 ν‚€: {list(request.form.keys())}")
log_print(f"Files ν‚€: {list(request.files.keys())}")
log_print(f"μ‚¬μš©μž: {current_user.username if current_user else 'None'}")
log_print(f"{'='*60}\n")
# μ—…λ‘œλ“œ 폴더 확인 및 생성
try:
ensure_upload_folder()
log_print(f"[1/8] μ—…λ‘œλ“œ 폴더 확인 μ™„λ£Œ: {UPLOAD_FOLDER}")
except Exception as e:
error_msg = f'μ—…λ‘œλ“œ 폴더λ₯Ό μ€€λΉ„ν•  수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}'
log_print(f"[ERROR] {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'folder_check'}), 500
if 'file' not in request.files:
error_msg = '파일이 μ—†μŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
log_print(f"μ‚¬μš© κ°€λŠ₯ν•œ ν‚€: {list(request.files.keys())}")
return jsonify({'error': error_msg, 'step': 'file_check'}), 400
file = request.files['file']
model_name = request.form.get('model_name', '').strip()
parent_file_id = request.form.get('parent_file_id', None) # μ΄μ–΄μ„œ μ—…λ‘œλ“œν•  경우 원본 파일 ID
log_print(f"[2/8] 파일 μˆ˜μ‹ : {file.filename if file else 'None'}")
log_print(f"[2/8] λͺ¨λΈλͺ…: {model_name if model_name else 'None (λΉ„μ–΄μžˆμŒ)'}")
log_print(f"[2/8] μ΄μ–΄μ„œ μ—…λ‘œλ“œ: {parent_file_id if parent_file_id else 'μ•„λ‹ˆμ˜€'}")
if file.filename == '':
error_msg = '파일λͺ…이 μ—†μŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'filename_check'}), 400
# λͺ¨λΈλͺ… 검증
if not model_name:
error_msg = 'AI λͺ¨λΈμ„ μ„ νƒν•΄μ£Όμ„Έμš”.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'model_check'}), 400
# parent_file_id 검증 (μ΄μ–΄μ„œ μ—…λ‘œλ“œμΈ 경우)
parent_file = None
if parent_file_id:
try:
parent_file_id = int(parent_file_id)
parent_file = UploadedFile.query.filter_by(
id=parent_file_id,
uploaded_by=current_user.id
).first()
if not parent_file:
error_msg = '원본 νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'parent_file_check'}), 404
# 같은 λͺ¨λΈμΈμ§€ 확인
if parent_file.model_name != model_name:
error_msg = '같은 λͺ¨λΈμ˜ νŒŒμΌμ—λ§Œ μ΄μ–΄μ„œ μ—…λ‘œλ“œν•  수 μžˆμŠ΅λ‹ˆλ‹€.'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'model_mismatch'}), 400
log_print(f"[μ΄μ–΄μ„œ μ—…λ‘œλ“œ] 원본 파일: {parent_file.original_filename} (ID: {parent_file_id})")
except (ValueError, TypeError):
parent_file_id = None
log_print(f"[κ²½κ³ ] 잘λͺ»λœ parent_file_id: {parent_file_id}")
log_print(f"[3/8] μ—…λ‘œλ“œ μ‹œλ„: {file.filename}, λͺ¨λΈ: {model_name}")
if not allowed_file(file.filename):
error_msg = f'ν—ˆμš©λ˜μ§€ μ•Šμ€ 파일 ν˜•μ‹μž…λ‹ˆλ‹€. ν—ˆμš© ν˜•μ‹: {", ".join(ALLOWED_EXTENSIONS)}'
log_print(f"[ERROR] {error_msg}")
return jsonify({'error': error_msg, 'step': 'file_type_check'}), 400
log_print(f"[4/8] 파일 ν˜•μ‹ 확인 μ™„λ£Œ: {file.filename}")
# 파일 크기 확인 (Content-Length 헀더 μ‚¬μš©)
file_size = 0
try:
# Content-Length 헀더 확인
if request.content_length:
file_size = request.content_length
print(f"Content-Length둜 파일 크기 확인: {file_size} bytes")
else:
# Content-Lengthκ°€ μ—†μœΌλ©΄ 파일 μŠ€νŠΈλ¦Όμ—μ„œ 크기 확인 μ‹œλ„
try:
# 파일 슀트림의 ν˜„μž¬ μœ„μΉ˜ μ €μž₯
current_pos = file.tell()
# 파일 끝으둜 이동
file.seek(0, os.SEEK_END)
file_size = file.tell()
# μ›λž˜ μœ„μΉ˜λ‘œ 볡원
file.seek(current_pos, os.SEEK_SET)
print(f"파일 슀트림으둜 크기 확인: {file_size} bytes")
except (AttributeError, IOError, OSError) as e:
print(f"파일 크기 확인 μ‹€νŒ¨ (μ €μž₯ ν›„ 확인): {str(e)}")
file_size = 0 # μ €μž₯ ν›„ ν™•μΈν•˜λ„λ‘ 0으둜 μ„€μ •
except Exception as e:
print(f"파일 크기 확인 였λ₯˜: {str(e)}")
file_size = 0 # μ €μž₯ ν›„ ν™•μΈν•˜λ„λ‘ 0으둜 μ„€μ •
# 파일 크기 사전 체크 (κ°€λŠ₯ν•œ κ²½μš°μ—λ§Œ)
if file_size > 0:
if file_size > 100 * 1024 * 1024: # 100MB
print(f"파일 크기 초과: {file_size} bytes")
return jsonify({'error': '파일 크기가 λ„ˆλ¬΄ ν½λ‹ˆλ‹€. μ΅œλŒ€ 100MBκΉŒμ§€ μ—…λ‘œλ“œ κ°€λŠ₯ν•©λ‹ˆλ‹€.'}), 400
if file_size == 0:
print("빈 파일 μ—…λ‘œλ“œ μ‹œλ„")
return jsonify({'error': '빈 νŒŒμΌμ€ μ—…λ‘œλ“œν•  수 μ—†μŠ΅λ‹ˆλ‹€.'}), 400
# μ•ˆμ „ν•œ 파일λͺ… 생성
original_filename = file.filename
filename = secure_filename(original_filename)
if not filename:
return jsonify({'error': 'μœ νš¨ν•˜μ§€ μ•Šμ€ 파일λͺ…μž…λ‹ˆλ‹€.'}), 400
unique_filename = f"{uuid.uuid4().hex}_{filename}"
file_path = os.path.join(UPLOAD_FOLDER, unique_filename)
# 파일 μ €μž₯
try:
log_print(f"[6/8] 파일 μ €μž₯ μ‹œλ„: {file_path}")
file.save(file_path)
log_print(f"[6/8] 파일 μ €μž₯ μ™„λ£Œ: {file_path}")
except IOError as e:
error_msg = f'파일 μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'
log_print(f"[ERROR] 파일 μ €μž₯ IOError: {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'file_save'}), 500
except PermissionError as e:
error_msg = f'파일 μ €μž₯ κΆŒν•œ 였λ₯˜: {str(e)}'
log_print(f"[ERROR] 파일 μ €μž₯ PermissionError: {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'file_save_permission'}), 500
except Exception as e:
error_msg = f'파일 μ €μž₯ μ‹€νŒ¨: {str(e)}'
log_print(f"[ERROR] 파일 μ €μž₯ Exception: {error_msg}")
traceback.print_exc()
return jsonify({'error': error_msg, 'step': 'file_save'}), 500
# μ €μž₯된 파일 크기 확인
if not os.path.exists(file_path):
error_msg = '파일이 μ €μž₯λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.'
print(f"파일 쑴재 확인 μ‹€νŒ¨: {file_path}")
return jsonify({'error': error_msg}), 500
saved_file_size = os.path.getsize(file_path)
if saved_file_size == 0:
os.remove(file_path) # 빈 파일 μ‚­μ œ
error_msg = '파일이 μ œλŒ€λ‘œ μ €μž₯λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.'
print(f"빈 파일 μ‚­μ œ: {file_path}")
return jsonify({'error': error_msg}), 500
print(f"μ €μž₯된 파일 크기: {saved_file_size} bytes")
# λ°μ΄ν„°λ² μ΄μŠ€μ— μ €μž₯
try:
log_print(f"[7/8] λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ μ‹œλ„: {original_filename}")
uploaded_file = UploadedFile(
filename=unique_filename,
original_filename=original_filename,
file_path=file_path,
file_size=saved_file_size,
model_name=model_name, # 이미 검증됨
uploaded_by=current_user.id,
parent_file_id=parent_file_id if parent_file else None # μ΄μ–΄μ„œ μ—…λ‘œλ“œμΈ 경우
)
db.session.add(uploaded_file)
db.session.flush() # IDλ₯Ό μ–»κΈ° μœ„ν•΄ flush
log_print(f"[7/8] λ°μ΄ν„°λ² μ΄μŠ€ flush μ™„λ£Œ, 파일 ID: {uploaded_file.id}")
# ν…μŠ€νŠΈ 파일인 경우 청크둜 λΆ„ν• ν•˜μ—¬ μ €μž₯ (RAG용)
if original_filename.lower().endswith(('.txt', '.md')):
try:
log_print(f"[7/8] 청크 생성 μ‹œμž‘: {original_filename}")
log_print(f"[7/8] 파일 ID: {uploaded_file.id}")
# 파일 λ‚΄μš© 읽기
encoding = 'utf-8'
try:
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
log_print(f"[7/8] UTF-8 μΈμ½”λ”©μœΌλ‘œ 파일 읽기 성곡: {len(content)}자")
except UnicodeDecodeError:
log_print(f"[7/8] UTF-8 인코딩 μ‹€νŒ¨, CP949 μ‹œλ„: {original_filename}")
with open(file_path, 'r', encoding='cp949') as f:
content = f.read()
log_print(f"[7/8] CP949 μΈμ½”λ”©μœΌλ‘œ 파일 읽기 성곡: {len(content)}자")
# 청크 생성 및 μ €μž₯
log_print(f"[7/8] 청크 생성 ν•¨μˆ˜ 호좜 쀑...")
chunk_count = create_chunks_for_file(uploaded_file.id, content)
if chunk_count > 0:
log_print(f"[7/8] βœ… 성곡: 파일 {original_filename}을 {chunk_count}개의 청크둜 λΆ„ν• ν–ˆμŠ΅λ‹ˆλ‹€.")
print(f"파일 {original_filename}을 {chunk_count}개의 청크둜 λΆ„ν• ν–ˆμŠ΅λ‹ˆλ‹€.")
else:
log_print(f"[7/8] ⚠️ κ²½κ³ : 청크가 μƒμ„±λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€. (파일이 λ„ˆλ¬΄ μ§§κ±°λ‚˜ λΉ„μ–΄μžˆμ„ 수 μžˆμŠ΅λ‹ˆλ‹€.)")
print(f"κ²½κ³ : 파일 {original_filename}에 λŒ€ν•œ 청크가 μƒμ„±λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
# Parent Chunk 생성 (AI 뢄석)
log_print(f"[7/9] Parent Chunk 생성 μ‹œμž‘ (AI 뢄석)...")
parent_chunk = create_parent_chunk_with_ai(uploaded_file.id, content, model_name)
if parent_chunk:
log_print(f"[7/9] βœ… Parent Chunk 생성 μ™„λ£Œ: {original_filename}")
print(f"Parent Chunkκ°€ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€: {original_filename}")
else:
log_print(f"[7/9] ⚠️ κ²½κ³ : Parent Chunk 생성 μ‹€νŒ¨: {original_filename}")
print(f"κ²½κ³ : Parent Chunk 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€: {original_filename}")
except Exception as e:
error_msg = f"청크 생성 쀑 였λ₯˜: {str(e)}"
log_print(f"[7/8] ❌ 였λ₯˜: {error_msg}")
print(error_msg)
import traceback
traceback.print_exc()
# μ΅œμ’… 청크 개수 확인 및 μ €μž₯
chunk_count = 0
if original_filename.lower().endswith(('.txt', '.md')):
chunk_count = DocumentChunk.query.filter_by(file_id=uploaded_file.id).count()
log_print(f"[8/8] μ΅œμ’… 청크 개수 확인: {chunk_count}개")
db.session.commit()
log_print(f"[8/8] λ°μ΄ν„°λ² μ΄μŠ€ 컀밋 μ™„λ£Œ: {original_filename}")
log_print(f"[8/8] μ—°κ²°λœ λͺ¨λΈ: {model_name}")
log_print(f"[8/8] μƒμ„±λœ 청크 수: {chunk_count}")
# ν•™μŠ΅ μƒνƒœ μš”μ•½
if chunk_count > 0:
log_print(f"[8/8] βœ… AI ν•™μŠ΅ μ€€λΉ„ μ™„λ£Œ: {chunk_count}개 청크가 μ €μž₯λ˜μ–΄ RAG 검색에 μ‚¬μš© κ°€λŠ₯ν•©λ‹ˆλ‹€.")
else:
log_print(f"[8/8] ⚠️ κ²½κ³ : 청크가 μƒμ„±λ˜μ§€ μ•Šμ•„ RAG 검색이 λΆˆκ°€λŠ₯ν•©λ‹ˆλ‹€.")
log_print(f"{'='*60}")
log_print(f"=== 파일 μ—…λ‘œλ“œ 성곡 ===")
log_print(f"{'='*60}\n")
except Exception as e:
db.session.rollback()
error_msg = f'λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'
log_print(f"[ERROR] λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ 였λ₯˜: {error_msg}")
traceback.print_exc()
# λ°μ΄ν„°λ² μ΄μŠ€ μ €μž₯ μ‹€νŒ¨ μ‹œ νŒŒμΌλ„ μ‚­μ œ
if os.path.exists(file_path):
try:
os.remove(file_path)
log_print(f"였λ₯˜λ‘œ μΈν•œ 파일 μ‚­μ œ: {file_path}")
except Exception as del_e:
log_print(f"파일 μ‚­μ œ μ‹€νŒ¨: {str(del_e)}")
return jsonify({'error': error_msg, 'step': 'database_save'}), 500
log_print(f"[8/8] μ—…λ‘œλ“œ μ™„λ£Œ - 파일: {original_filename}, λͺ¨λΈ: {model_name}, 크기: {saved_file_size} bytes")
return jsonify({
'message': f'파일이 μ„±κ³΅μ μœΌλ‘œ μ—…λ‘œλ“œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (λͺ¨λΈ: {model_name})',
'file': uploaded_file.to_dict(),
'model_name': model_name,
'chunk_count': chunk_count if 'chunk_count' in locals() else 0
}), 200
except Exception as e:
db.session.rollback()
error_msg = str(e)
error_type = type(e).__name__
log_print(f"\n{'='*60}")
log_print(f"=== μ—…λ‘œλ“œ 처리 쀑 μ˜ˆμ™Έ λ°œμƒ ===")
log_print(f"μ˜ˆμ™Έ νƒ€μž…: {error_type}")
log_print(f"μ—λŸ¬ λ©”μ‹œμ§€: {error_msg}")
traceback.print_exc()
log_print(f"{'='*60}\n")
# 파일 크기 초과 였λ₯˜ 처리
if '413' in error_msg or 'Request Entity Too Large' in error_msg or error_type == 'RequestEntityTooLarge':
return jsonify({'error': '파일 크기가 λ„ˆλ¬΄ ν½λ‹ˆλ‹€. μ΅œλŒ€ 100MBκΉŒμ§€ μ—…λ‘œλ“œ κ°€λŠ₯ν•©λ‹ˆλ‹€.', 'step': 'file_size'}), 413
return jsonify({'error': f'파일 μ—…λ‘œλ“œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {error_type}: {error_msg}', 'step': 'exception'}), 500
@main_bp.route('/api/files', methods=['GET'])
@login_required
def get_files():
"""μ—…λ‘œλ“œλœ 파일 λͺ©λ‘ 쑰회"""
try:
model_name = request.args.get('model_name', None)
# 원본 파일만 쑰회 (parent_file_idκ°€ None인 파일)
query = UploadedFile.query.filter_by(parent_file_id=None)
if model_name:
query = query.filter_by(model_name=model_name)
print(f"[파일 쑰회] λͺ¨λΈ '{model_name}' 필터링")
files = query.order_by(UploadedFile.uploaded_at.desc()).all()
# 각 원본 νŒŒμΌμ— λŒ€ν•΄ μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ„ 포함
files_with_children = []
for file in files:
file_dict = file.to_dict()
# 청크 개수 μΆ”κ°€
chunk_count = DocumentChunk.query.filter_by(file_id=file.id).count()
file_dict['chunk_count'] = chunk_count
# μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ νŒŒμΌλ“€λ„ 쑰회
child_files = UploadedFile.query.filter_by(parent_file_id=file.id).order_by(UploadedFile.uploaded_at.asc()).all()
child_files_dict = []
for child in child_files:
child_dict = child.to_dict()
child_chunk_count = DocumentChunk.query.filter_by(file_id=child.id).count()
child_dict['chunk_count'] = child_chunk_count
child_files_dict.append(child_dict)
file_dict['child_files'] = child_files_dict
files_with_children.append(file_dict)
# λͺ¨λΈλ³„ 톡계 정보 μΆ”κ°€ (원본 파일만 카운트)
model_stats = {}
if not model_name:
# λͺ¨λ“  λͺ¨λΈμ˜ 톡계 (원본 파일만)
all_files = UploadedFile.query.filter_by(parent_file_id=None).all()
for file in all_files:
model = file.model_name or 'λ―Έμ§€μ •'
if model not in model_stats:
model_stats[model] = {'count': 0, 'total_size': 0}
model_stats[model]['count'] += 1
model_stats[model]['total_size'] += file.file_size
else:
# νŠΉμ • λͺ¨λΈμ˜ 톡계
model_stats[model_name] = {
'count': len(files),
'total_size': sum(f.file_size for f in files)
}
print(f"[파일 쑰회] 쑰회된 원본 파일 수: {len(files)}개")
return jsonify({
'files': files_with_children,
'model_stats': model_stats,
'filtered_model': model_name
}), 200
except Exception as e:
return jsonify({'error': f'파일 λͺ©λ‘ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/chunks', methods=['GET'])
@login_required
def get_file_chunks(file_id):
"""파일의 청크 정보 쑰회 (ν•™μŠ΅ μƒνƒœ ν™•μΈμš©)"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all()
total_chunks = len(chunks)
# μƒ˜ν”Œ 청크 (처음 3개)
sample_chunks = []
for chunk in chunks[:3]:
sample_chunks.append({
'index': chunk.chunk_index,
'content_preview': chunk.content[:100] + '...' if len(chunk.content) > 100 else chunk.content,
'content_length': len(chunk.content)
})
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'model_name': file.model_name,
'total_chunks': total_chunks,
'sample_chunks': sample_chunks,
'learning_status': 'ready' if total_chunks > 0 else 'not_ready',
'message': f'{total_chunks}개 청크가 μ €μž₯λ˜μ–΄ RAG 검색에 μ‚¬μš© κ°€λŠ₯ν•©λ‹ˆλ‹€.' if total_chunks > 0 else '청크가 μƒμ„±λ˜μ§€ μ•Šμ•„ RAG 검색이 λΆˆκ°€λŠ₯ν•©λ‹ˆλ‹€.'
}), 200
except Exception as e:
return jsonify({'error': f'청크 정보 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/parent-chunk', methods=['GET'])
@login_required
def get_file_parent_chunk(file_id):
"""파일의 Parent Chunk 쑰회"""
try:
file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first()
if not file:
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first()
if not parent_chunk:
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'has_parent_chunk': False,
'message': 'Parent Chunkκ°€ μƒμ„±λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.'
}), 200
return jsonify({
'file_id': file_id,
'filename': file.original_filename,
'has_parent_chunk': True,
'parent_chunk': parent_chunk.to_dict(),
'message': 'Parent Chunkκ°€ μ‘΄μž¬ν•©λ‹ˆλ‹€.'
}), 200
except Exception as e:
return jsonify({'error': f'Parent Chunk 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>', methods=['DELETE'])
@login_required
def delete_file(file_id):
"""μ—…λ‘œλ“œλœ 파일 μ‚­μ œ (μ—°κ΄€λœ λͺ¨λ“  νŒŒμΌλ„ ν•¨κ»˜ μ‚­μ œ)"""
try:
file = UploadedFile.query.get_or_404(file_id)
# 원본 파일인 경우 (parent_file_idκ°€ None인 경우)
# μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ λͺ¨λ“  νŒŒμΌλ„ ν•¨κ»˜ μ‚­μ œ
files_to_delete = []
if file.parent_file_id is None:
# 원본 파일이면, μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ λͺ¨λ“  νŒŒμΌλ„ μ°Ύμ•„μ„œ μ‚­μ œ
child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all()
files_to_delete = [file] + child_files
print(f"[파일 μ‚­μ œ] 원본 파일 μ‚­μ œ: {file.original_filename}, μ—°κ΄€ 파일 {len(child_files)}κ°œλ„ ν•¨κ»˜ μ‚­μ œ")
else:
# μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ 파일이면 원본 νŒŒμΌλ„ ν•¨κ»˜ μ‚­μ œ
parent_file = UploadedFile.query.get(file.parent_file_id)
if parent_file:
# 원본 파일과 λͺ¨λ“  μ—°κ΄€ 파일 μ‚­μ œ
all_child_files = UploadedFile.query.filter_by(parent_file_id=file.parent_file_id).all()
files_to_delete = [parent_file] + all_child_files
print(f"[파일 μ‚­μ œ] μ΄μ–΄μ„œ μ—…λ‘œλ“œλœ 파일 μ‚­μ œ: {file.original_filename}, 원본 및 μ—°κ΄€ 파일 {len(all_child_files)}κ°œλ„ ν•¨κ»˜ μ‚­μ œ")
else:
files_to_delete = [file]
deleted_count = 0
deleted_files = []
for file_to_delete in files_to_delete:
try:
# 파일 μ‹œμŠ€ν…œμ—μ„œ μ‚­μ œ
if os.path.exists(file_to_delete.file_path):
os.remove(file_to_delete.file_path)
print(f"[파일 μ‚­μ œ] 파일 μ‹œμŠ€ν…œμ—μ„œ μ‚­μ œ: {file_to_delete.file_path}")
# κ΄€λ ¨ Child Chunk (DocumentChunk) μ‚­μ œ
child_chunk_count = DocumentChunk.query.filter_by(file_id=file_to_delete.id).count()
if child_chunk_count > 0:
DocumentChunk.query.filter_by(file_id=file_to_delete.id).delete()
print(f"[파일 μ‚­μ œ] Child Chunk {child_chunk_count}개 μ‚­μ œ μ™„λ£Œ")
# 벑터 DBμ—μ„œλ„ ν•΄λ‹Ή 파일의 청크 μ‚­μ œ
try:
vector_db = get_vector_db()
vector_db.delete_chunks_by_file_id(file_to_delete.id)
print(f"[파일 μ‚­μ œ] 벑터 DBμ—μ„œ 청크 μ‚­μ œ μ™„λ£Œ")
except Exception as vector_e:
print(f"[파일 μ‚­μ œ] 벑터 DB μ‚­μ œ 였λ₯˜ (λ¬΄μ‹œ): {str(vector_e)}")
# κ΄€λ ¨ Parent Chunk μ‚­μ œ
parent_chunk = ParentChunk.query.filter_by(file_id=file_to_delete.id).first()
if parent_chunk:
db.session.delete(parent_chunk)
print(f"[파일 μ‚­μ œ] Parent Chunk μ‚­μ œ μ™„λ£Œ")
deleted_files.append(file_to_delete.original_filename)
db.session.delete(file_to_delete)
deleted_count += 1
print(f"[파일 μ‚­μ œ] λ°μ΄ν„°λ² μ΄μŠ€μ—μ„œ 파일 μ‚­μ œ μ™„λ£Œ: {file_to_delete.original_filename}")
except Exception as e:
print(f"[파일 μ‚­μ œ 였λ₯˜] {file_to_delete.original_filename}: {str(e)}")
import traceback
traceback.print_exc()
db.session.commit()
message = f'파일이 μ„±κ³΅μ μœΌλ‘œ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.'
if deleted_count > 1:
message = f'파일 {deleted_count}κ°œκ°€ μ„±κ³΅μ μœΌλ‘œ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. (원본 및 μ—°κ΄€ 파일 포함)'
return jsonify({
'message': message,
'deleted_count': deleted_count,
'deleted_files': deleted_files
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'파일 μ‚­μ œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/files/<int:file_id>/content', methods=['GET'])
@login_required
def get_file_content(file_id):
"""μ—…λ‘œλ“œλœ 파일 λ‚΄μš© 쑰회"""
try:
file = UploadedFile.query.get_or_404(file_id)
if not os.path.exists(file.file_path):
return jsonify({'error': 'νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€.'}), 404
# ν…μŠ€νŠΈ 파일 읽기
encoding = 'utf-8'
try:
with open(file.file_path, 'r', encoding=encoding) as f:
content = f.read()
except UnicodeDecodeError:
# UTF-8둜 읽을 수 μ—†μœΌλ©΄ λ‹€λ₯Έ 인코딩 μ‹œλ„
with open(file.file_path, 'r', encoding='cp949') as f:
content = f.read()
return jsonify({
'content': content,
'filename': file.original_filename
}), 200
except Exception as e:
return jsonify({'error': f'파일 λ‚΄μš© 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions', methods=['GET'])
@login_required
def get_chat_sessions():
"""μ‚¬μš©μžμ˜ λŒ€ν™” μ„Έμ…˜ λͺ©λ‘ 쑰회 (졜근 20개만 ν‘œμ‹œ)"""
try:
sessions = ChatSession.query.filter_by(user_id=current_user.id)\
.order_by(ChatSession.updated_at.desc())\
.limit(20).all()
return jsonify({
'sessions': [session.to_dict() for session in sessions]
}), 200
except Exception as e:
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions', methods=['POST'])
@login_required
def create_chat_session():
"""μƒˆ λŒ€ν™” μ„Έμ…˜ 생성"""
try:
data = request.json
title = data.get('title', 'μƒˆ λŒ€ν™”')
model_name = data.get('model_name', None)
session = ChatSession(
user_id=current_user.id,
title=title,
model_name=model_name
)
db.session.add(session)
db.session.commit()
return jsonify({
'message': 'λŒ€ν™” μ„Έμ…˜μ΄ μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'session': session.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>', methods=['GET'])
@login_required
def get_chat_session(session_id):
"""λŒ€ν™” μ„Έμ…˜ 상세 쑰회 (λ©”μ‹œμ§€ 포함)"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
session_dict = session.to_dict()
session_dict['messages'] = [msg.to_dict() for msg in session.messages]
return jsonify({'session': session_dict}), 200
except Exception as e:
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ 쑰회 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>', methods=['PUT'])
@login_required
def update_chat_session(session_id):
"""λŒ€ν™” μ„Έμ…˜ μˆ˜μ • (제λͺ© λ“±)"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
data = request.json
if 'title' in data:
session.title = data['title']
session.updated_at = datetime.utcnow()
db.session.commit()
return jsonify({
'message': 'λŒ€ν™” μ„Έμ…˜μ΄ μˆ˜μ •λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'session': session.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ μˆ˜μ • 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>', methods=['DELETE'])
@login_required
def delete_chat_session(session_id):
"""λŒ€ν™” μ„Έμ…˜ μ‚­μ œ"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
db.session.delete(session)
db.session.commit()
return jsonify({'message': 'λŒ€ν™” μ„Έμ…˜μ΄ μ‚­μ œλ˜μ—ˆμŠ΅λ‹ˆλ‹€.'}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λŒ€ν™” μ„Έμ…˜ μ‚­μ œ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500
@main_bp.route('/api/chat/sessions/<int:session_id>/messages', methods=['POST'])
@login_required
def add_chat_message(session_id):
"""λŒ€ν™” λ©”μ‹œμ§€ μΆ”κ°€"""
try:
session = ChatSession.query.filter_by(
id=session_id,
user_id=current_user.id
).first_or_404()
data = request.json
role = data.get('role', 'user')
content = data.get('content', '')
if not content:
return jsonify({'error': 'λ©”μ‹œμ§€ λ‚΄μš©μ΄ ν•„μš”ν•©λ‹ˆλ‹€.'}), 400
message = ChatMessage(
session_id=session_id,
role=role,
content=content
)
db.session.add(message)
# μ„Έμ…˜ 제λͺ© μ—…λ°μ΄νŠΈ (첫 μ‚¬μš©μž λ©”μ‹œμ§€μΈ 경우)
if not session.title or session.title == 'μƒˆ λŒ€ν™”':
if role == 'user':
title = content[:30] + '...' if len(content) > 30 else content
session.title = title
session.updated_at = datetime.utcnow()
db.session.commit()
return jsonify({
'message': 'λ©”μ‹œμ§€κ°€ μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.',
'chat_message': message.to_dict()
}), 200
except Exception as e:
db.session.rollback()
return jsonify({'error': f'λ©”μ‹œμ§€ μΆ”κ°€ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {str(e)}'}), 500