SOY NV AI
Improve Ollama error handling and connection checking - Add Ollama server connection check before API calls - Improve error messages for better debugging - Add detailed error logging for Ollama communication errors
ead5574
| from flask import Blueprint, render_template, request, jsonify, send_from_directory, redirect, url_for, flash | |
| from flask_login import login_user, logout_user, login_required, current_user | |
| from werkzeug.utils import secure_filename | |
| from app.database import db, UploadedFile, User, ChatSession, ChatMessage, DocumentChunk, ParentChunk, SystemConfig, EpisodeAnalysis, GraphEntity, GraphRelationship, GraphEvent | |
| from app.vector_db import get_vector_db | |
| from app.gemini_client import get_gemini_client | |
| import requests | |
| import os | |
| from datetime import datetime | |
| import uuid | |
| import re | |
| import json | |
| main_bp = Blueprint('main', __name__) | |
| def admin_required(f): | |
| """κ΄λ¦¬μ κΆνμ΄ νμν λ°μ½λ μ΄ν°""" | |
| from functools import wraps | |
| def decorated_function(*args, **kwargs): | |
| if not current_user.is_admin: | |
| # API μμ²μΈ κ²½μ° JSON μλ΅ λ°ν | |
| if request.path.startswith('/api/'): | |
| return jsonify({'error': 'κ΄λ¦¬μ κΆνμ΄ νμν©λλ€.'}), 403 | |
| flash('κ΄λ¦¬μ κΆνμ΄ νμν©λλ€.', 'error') | |
| return redirect(url_for('main.index')) | |
| return f(*args, **kwargs) | |
| return decorated_function | |
| # Ollama κΈ°λ³Έ URL (νκ²½ λ³μλ‘ μ€μ κ°λ₯) | |
| OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434') | |
| def get_model_token_limit(model_name, default_tokens=2000, token_type='output'): | |
| """λͺ¨λΈλ³ ν ν° μ μ ν κ°μ Έμ€κΈ° (νμ νΈνμ±μ μν΄ κΈ°λ³Έκ°μ μΆλ ₯ ν ν°) | |
| Args: | |
| model_name: AI λͺ¨λΈλͺ (μ: "gemini-2.0-flash-exp", "gemini:gemini-2.0-flash-exp", "gemma2:9b") | |
| default_tokens: κΈ°λ³Έ ν ν° μ (μ€μ μ΄ μμ λ μ¬μ©) | |
| token_type: 'input' λλ 'output' (κΈ°λ³Έκ°: 'output') | |
| Returns: | |
| ν ν° μ (μ μ) | |
| """ | |
| return get_model_token_limit_by_type(model_name, default_tokens, token_type) | |
| def get_model_token_limit_by_type(model_name, default_tokens=2000, token_type='output'): | |
| """λͺ¨λΈλ³ ν ν° μ μ ν κ°μ Έμ€κΈ° (μ λ ₯/μΆλ ₯/Parent Chunk ꡬλΆ) | |
| Args: | |
| model_name: AI λͺ¨λΈλͺ (μ: "gemini-2.0-flash-exp", "gemini:gemini-2.0-flash-exp", "gemma2:9b") | |
| default_tokens: κΈ°λ³Έ ν ν° μ (μ€μ μ΄ μμ λ μ¬μ©) | |
| token_type: 'input', 'output', λλ 'parent_chunk' | |
| Returns: | |
| ν ν° μ (μ μ) | |
| """ | |
| if not model_name: | |
| return default_tokens | |
| try: | |
| from app.database import SystemConfig | |
| # μ¬λ¬ νμμ λͺ¨λΈλͺ μ μλ | |
| # 1. μλ³Έ λͺ¨λΈλͺ κ·Έλλ‘ | |
| # 2. Gemini λͺ¨λΈμ κ²½μ° "gemini:" μ λμ¬ μΆκ°/μ κ±° λ²μ | |
| # 3. Ollama λͺ¨λΈμ κ²½μ° κ·Έλλ‘ | |
| model_name_clean = model_name.strip() | |
| possible_keys = [model_name_clean] | |
| # Gemini λͺ¨λΈ μ²λ¦¬ | |
| if model_name_clean.startswith('gemini:'): | |
| # "gemini:gemini-2.0-flash-exp" -> "gemini:gemini-2.0-flash-exp" (κ·Έλλ‘) | |
| # λλ "gemini-2.0-flash-exp" (μ λμ¬ μ κ±°) | |
| possible_keys.append(model_name_clean.replace('gemini:', '', 1)) | |
| elif model_name_clean.startswith('gemini-'): | |
| # "gemini-2.0-flash-exp" -> "gemini:gemini-2.0-flash-exp" (μ λμ¬ μΆκ°) | |
| possible_keys.append(f'gemini:{model_name_clean}') | |
| # κ° κ°λ₯ν ν€λ₯Ό μλ | |
| for key in possible_keys: | |
| # μλ‘μ΄ νμ: model_token_input_{model_name}, model_token_output_{model_name}, model_token_parent_chunk_{model_name} | |
| config_key = f"model_token_{token_type}_{key}" | |
| token_value = SystemConfig.get_config(config_key) | |
| if token_value: | |
| try: | |
| token_int = int(token_value) | |
| print(f"[get_model_token_limit_by_type] λͺ¨λΈ '{model_name}'μ {token_type} ν ν° μ {token_int} μ¬μ© (ν€: {config_key})") | |
| return token_int | |
| except (ValueError, TypeError): | |
| continue | |
| # νμ νΈνμ±: κΈ°μ‘΄ νμ model_token_{model_name}λ νμΈ (μΆλ ₯ ν ν°μΌλ‘ κ°μ£Ό) | |
| if token_type == 'output': | |
| old_config_key = f"model_token_{key}" | |
| token_value = SystemConfig.get_config(old_config_key) | |
| if token_value: | |
| try: | |
| token_int = int(token_value) | |
| print(f"[get_model_token_limit_by_type] λͺ¨λΈ '{model_name}'μ μΆλ ₯ ν ν° μ {token_int} μ¬μ© (κΈ°μ‘΄ ν€: {old_config_key})") | |
| return token_int | |
| except (ValueError, TypeError): | |
| continue | |
| # μ€μ μ΄ μμΌλ©΄ κΈ°λ³Έκ° μ¬μ© | |
| print(f"[get_model_token_limit_by_type] λͺ¨λΈ '{model_name}'μ {token_type} ν ν° μ μ€μ μ΄ μμ΄ κΈ°λ³Έκ° {default_tokens} μ¬μ©") | |
| except Exception as e: | |
| print(f"[get_model_token_limit_by_type] μ€λ₯: {e}") | |
| return default_tokens | |
| # μ λ‘λ μ€μ | |
| UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'uploads') | |
| ALLOWED_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'epub'} | |
| # μ λ‘λ ν΄λ κ²½λ‘ μΆλ ₯ (λλ²κΉ μ©) | |
| print(f"[μ λ‘λ μ€μ ] μ λ‘λ ν΄λ κ²½λ‘: {UPLOAD_FOLDER}") | |
| print(f"[μ λ‘λ μ€μ ] μ λ‘λ ν΄λ μ‘΄μ¬ μ¬λΆ: {os.path.exists(UPLOAD_FOLDER)}") | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def ensure_upload_folder(): | |
| """μ λ‘λ ν΄λκ° μμΌλ©΄ μμ±""" | |
| try: | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| print(f"μ λ‘λ ν΄λ μμ± μ€: {UPLOAD_FOLDER}") | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| raise Exception(f'μ λ‘λ ν΄λλ₯Ό μμ±ν μ μμ΅λλ€: {UPLOAD_FOLDER}') | |
| # ν΄λ μ°κΈ° κΆν νμΈ | |
| test_file = os.path.join(UPLOAD_FOLDER, '.write_test') | |
| try: | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| print(f"μ λ‘λ ν΄λ μ°κΈ° κΆν νμΈ μλ£: {UPLOAD_FOLDER}") | |
| except PermissionError as e: | |
| raise Exception(f'μ λ‘λ ν΄λμ μ°κΈ° κΆνμ΄ μμ΅λλ€: {UPLOAD_FOLDER} - {str(e)}') | |
| except Exception as e: | |
| raise Exception(f'μ λ‘λ ν΄λ μ°κΈ° ν μ€νΈ μ€ν¨: {UPLOAD_FOLDER} - {str(e)}') | |
| except Exception as e: | |
| print(f"μ λ‘λ ν΄λ μμ± μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| def split_text_into_chunks(text, min_chunk_size=200, max_chunk_size=1000, overlap=150): | |
| """μλ―Έ κΈ°λ° ν μ€νΈ μ²νΉ (λ¬Έμ₯κ³Ό λ¬Έλ¨ κ²½κ³λ₯Ό κ³ λ €νμ¬ λΆν )""" | |
| if not text or len(text.strip()) == 0: | |
| return [] | |
| # 1λ¨κ³: λ¬Έλ¨ λ¨μλ‘ λΆν (λΉ μ€ κΈ°μ€) | |
| paragraphs = re.split(r'\n\s*\n', text.strip()) | |
| paragraphs = [p.strip() for p in paragraphs if p.strip()] | |
| if not paragraphs: | |
| return [] | |
| # 2λ¨κ³: κ° λ¬Έλ¨μ λ¬Έμ₯ λ¨μλ‘ λΆν | |
| # λ¬Έμ₯ μ’ κ²° κΈ°νΈ: . ! ? (νκΈκ³Ό μλ¬Έ λͺ¨λ μ§μ) | |
| # ꡬλμ λ€μ 곡백μ΄λ μ€λ°κΏμ΄ μ€λ κ²½μ° λ¬Έμ₯ μ’ λ£λ‘ κ°μ£Ό | |
| sentence_pattern = r'([.!?]+)(?=\s+|$)' | |
| all_sentences = [] | |
| for para in paragraphs: | |
| # λ¬Έμ₯ λΆλ¦¬ (ꡬλμ ν¬ν¨) | |
| parts = re.split(sentence_pattern, para) | |
| combined_sentences = [] | |
| current_sentence = "" | |
| for i, part in enumerate(parts): | |
| if part.strip(): | |
| if re.match(r'^[.!?]+$', part): | |
| # ꡬλμ μΈ κ²½μ° νμ¬ λ¬Έμ₯μ μΆκ°νκ³ λ¬Έμ₯ μμ± | |
| current_sentence += part | |
| if current_sentence.strip(): | |
| combined_sentences.append(current_sentence.strip()) | |
| current_sentence = "" | |
| else: | |
| # ν μ€νΈμΈ κ²½μ° νμ¬ λ¬Έμ₯μ μΆκ° | |
| current_sentence += part | |
| # λ§μ§λ§ λ¬Έμ₯ μ²λ¦¬ (ꡬλμ μ΄ μλ κ²½μ°) | |
| if current_sentence.strip(): | |
| combined_sentences.append(current_sentence.strip()) | |
| # λ¬Έμ₯μ΄ νλλ μλ κ²½μ° (ꡬλμ μ΄ μ ν μλ λ¬Έλ¨) | |
| if not combined_sentences and para.strip(): | |
| combined_sentences.append(para.strip()) | |
| all_sentences.extend(combined_sentences) | |
| if not all_sentences: | |
| # λ¬Έμ₯ λΆλ¦¬κ° μ λλ κ²½μ° μλ³Έ ν μ€νΈλ₯Ό κ·Έλλ‘ λ°ν | |
| return [text] if text.strip() else [] | |
| # 3λ¨κ³: λ¬Έμ₯λ€μ λͺ¨μμ μλ―Έ μλ μ²ν¬ μμ± | |
| chunks = [] | |
| current_chunk = [] | |
| current_size = 0 | |
| for sentence in all_sentences: | |
| sentence_size = len(sentence) | |
| # νμ¬ μ²ν¬μ λ¬Έμ₯ μΆκ° μ μ΅λ ν¬κΈ°λ₯Ό μ΄κ³Όνλ κ²½μ° | |
| if current_size + sentence_size > max_chunk_size and current_chunk: | |
| # νμ¬ μ²ν¬ μ μ₯ (μ€λ°κΏ μ μ§) | |
| chunk_text = '\n'.join(current_chunk) | |
| if len(chunk_text.strip()) >= min_chunk_size: | |
| chunks.append(chunk_text) | |
| else: | |
| # μ΅μ ν¬κΈ° λ―Έλ§μ΄λ©΄ λ€μ μ²ν¬μ λ³ν© (μ€λ²λ© ν¨κ³Ό) | |
| if chunks: | |
| chunks[-1] = chunks[-1] + '\n' + chunk_text | |
| else: | |
| chunks.append(chunk_text) | |
| # μ€λ²λ©μ μν λ¬Έμ₯ μ μ§ (λ§μ§λ§ λͺ λ¬Έμ₯μ λ€μ μ²ν¬μ ν¬ν¨) | |
| overlap_sentences = [] | |
| overlap_size = 0 | |
| for s in reversed(current_chunk): | |
| if overlap_size + len(s) <= overlap: | |
| overlap_sentences.insert(0, s) | |
| overlap_size += len(s) + 1 # μ€λ°κΏ ν¬ν¨ | |
| else: | |
| break | |
| current_chunk = overlap_sentences + [sentence] | |
| current_size = overlap_size + sentence_size | |
| else: | |
| # νμ¬ μ²ν¬μ λ¬Έμ₯ μΆκ° | |
| current_chunk.append(sentence) | |
| current_size += sentence_size + 1 # μ€λ°κΏ ν¬ν¨ | |
| # λ§μ§λ§ μ²ν¬ μΆκ° | |
| if current_chunk: | |
| chunk_text = '\n'.join(current_chunk) | |
| if chunks and len(chunk_text.strip()) < min_chunk_size: | |
| # μ΅μ ν¬κΈ° λ―Έλ§μ΄λ©΄ μ΄μ μ²ν¬μ λ³ν© | |
| chunks[-1] = chunks[-1] + '\n' + chunk_text | |
| else: | |
| chunks.append(chunk_text) | |
| # λΉ μ²ν¬ μ κ±° λ° μ΅μ ν¬κΈ° λ―Έλ§ μ²ν¬ μ²λ¦¬ | |
| final_chunks = [] | |
| for chunk in chunks: | |
| chunk = chunk.strip() | |
| if chunk and len(chunk) >= min_chunk_size: | |
| final_chunks.append(chunk) | |
| elif chunk: | |
| # μ΅μ ν¬κΈ° λ―Έλ§ μ²ν¬λ μ΄μ μ²ν¬μ λ³ν© | |
| if final_chunks: | |
| final_chunks[-1] = final_chunks[-1] + '\n' + chunk | |
| else: | |
| final_chunks.append(chunk) | |
| return final_chunks if final_chunks else [text] if text.strip() else [] | |
| def extract_chapter_number(text): | |
| """ν μ€νΈμμ μ±ν° λ²νΈ μΆμΆ""" | |
| # λ€μν μ±ν° ν¨ν΄ λ§€μΉ | |
| patterns = [ | |
| r'μ \s*(\d+)\s*μ₯', # μ 1μ₯, μ 1 μ₯ | |
| r'μ \s*(\d+)\s*ν', # μ 1ν | |
| r'Chapter\s*(\d+)', # Chapter 1 | |
| r'CHAPTER\s*(\d+)', # CHAPTER 1 | |
| r'Ch\.\s*(\d+)', # Ch. 1 | |
| r'(\d+)\s*μ₯', # 1μ₯ | |
| r'(\d+)\s*ν', # 1ν | |
| r'CHAPTER\s*(\d+)', # CHAPTER 1 | |
| r'chap\.\s*(\d+)', # chap. 1 | |
| r'ch\s*(\d+)', # ch 1 | |
| r'(\d+)\s*η« ', # 1η« | |
| ] | |
| # ν μ€νΈμ μ²μ 500μλ§ κ²μ¬ (μ±ν° μ 보λ λ³΄ν΅ μλΆλΆμ μμ) | |
| search_text = text[:500] | |
| for pattern in patterns: | |
| match = re.search(pattern, search_text, re.IGNORECASE) | |
| if match: | |
| try: | |
| chapter_num = int(match.group(1)) | |
| return chapter_num | |
| except: | |
| continue | |
| return None | |
| def split_content_by_episodes(content): | |
| """μλ³Έ μΉμμ€μ #μνμ€λͺ , #1ν, #2ν λ±μΌλ‘ λΆν | |
| Returns: | |
| list: [(section_type, section_title, section_content, metadata), ...] | |
| section_type: 'μνμ€λͺ ' or 'ν' | |
| section_title: 'μνμ€λͺ ' or '1ν', '2ν', ... | |
| metadata: {'chapter': '#μνμ€λͺ '} or {'chapter': '1ν'} | |
| """ | |
| if not content or len(content.strip()) == 0: | |
| return [] | |
| sections = [] | |
| # #μνμ€λͺ , #1ν, #2ν λ±μ ν¨ν΄ μ°ΎκΈ° | |
| # ν¨ν΄: #μνμ€λͺ , #1ν, #2ν, #10ν λ± | |
| episode_pattern = r'^#\s*(μνμ€λͺ |\d+ν)' | |
| lines = content.split('\n') | |
| current_section_type = None | |
| current_section_title = None | |
| current_section_content = [] | |
| current_section_start_line = 0 | |
| for i, line in enumerate(lines): | |
| # μ€ μμ λΆλΆμμ #μνμ€λͺ λλ #nν ν¨ν΄ μ°ΎκΈ° | |
| match = re.match(episode_pattern, line.strip()) | |
| if match: | |
| # μ΄μ μΉμ μ μ₯ | |
| if current_section_type and current_section_content: | |
| section_content = '\n'.join(current_section_content).strip() | |
| if section_content: | |
| # λ©νλ°μ΄ν° μμ± | |
| if current_section_type == 'μνμ€λͺ ': | |
| metadata = {'chapter': '#μνμ€λͺ '} | |
| else: | |
| metadata = {'chapter': current_section_title} | |
| sections.append(( | |
| current_section_type, | |
| current_section_title, | |
| section_content, | |
| metadata | |
| )) | |
| # μ μΉμ μμ | |
| section_title = match.group(1) | |
| if section_title == 'μνμ€λͺ ': | |
| current_section_type = 'μνμ€λͺ ' | |
| current_section_title = 'μνμ€λͺ ' | |
| else: | |
| current_section_type = 'ν' | |
| current_section_title = section_title # '1ν', '2ν' λ± | |
| current_section_content = [line] # ν€λ λΌμΈ ν¬ν¨ | |
| current_section_start_line = i | |
| else: | |
| # νμ¬ μΉμ μ λ΄μ© μΆκ° | |
| if current_section_content is not None: | |
| current_section_content.append(line) | |
| # λ§μ§λ§ μΉμ μ μ₯ | |
| if current_section_type and current_section_content: | |
| section_content = '\n'.join(current_section_content).strip() | |
| if section_content: | |
| # λ©νλ°μ΄ν° μμ± | |
| if current_section_type == 'μνμ€λͺ ': | |
| metadata = {'chapter': '#μνμ€λͺ '} | |
| else: | |
| metadata = {'chapter': current_section_title} | |
| sections.append(( | |
| current_section_type, | |
| current_section_title, | |
| section_content, | |
| metadata | |
| )) | |
| # μΉμ μ΄ νλλ μμΌλ©΄ μ 체λ₯Ό νλμ μΉμ μΌλ‘ μ²λ¦¬ | |
| if not sections: | |
| sections.append(( | |
| 'κΈ°ν', | |
| 'μ 체', | |
| content.strip(), | |
| {'chapter': None} | |
| )) | |
| return sections | |
| def extract_metadata_with_ai(chunk_content, full_content=None, parent_chunk=None, model_name=None): | |
| """AIλ₯Ό μ¬μ©νμ¬ μ²ν¬μ λ©νλ°μ΄ν° μΆμΆ (νμ, λ±μ₯μΈλ¬Ό, μκ°μ λ°°κ²½, μΈλ¬Ό κ΄κ³) | |
| Args: | |
| chunk_content: λΆμν μ²ν¬ λ΄μ© | |
| full_content: μλ³Έ μΉμμ€ μ 체 λ΄μ© (μΈλ¬Ό κ΄κ³ νμ μ©) | |
| parent_chunk: Parent Chunk κ°μ²΄ (μ νμ¬ν) | |
| model_name: μ¬μ©ν AI λͺ¨λΈλͺ | |
| """ | |
| try: | |
| # μλ³Έ μΉμμ€ μ 체 λ΄μ©μ μ°Έμ‘°νμ¬ μΈλ¬Ό κ΄κ³ νμ | |
| full_content_preview = "" | |
| if full_content: | |
| # μ 체 λ΄μ©μ΄ λ무 κΈΈλ©΄ μλΆλΆκ³Ό λ·λΆλΆ μΌλΆλ§ μ¬μ© (μ΅λ 20000μ) | |
| if len(full_content) > 20000: | |
| full_content_preview = full_content[:10000] + "\n... (μ€κ° μλ΅) ...\n" + full_content[-10000:] | |
| else: | |
| full_content_preview = full_content | |
| # ν둬ννΈ μμ± | |
| prompt = f"""λ€μ μΉμμ€ ν μ€νΈλ₯Ό λΆμνμ¬ μλ μ 보λ₯Ό JSON νμμΌλ‘λ§ μλ΅νμΈμ. | |
| μλ³Έ μΉμμ€ μ 체 λ΄μ© (μ°Έκ³ μ©): | |
| {full_content_preview[:50000] if full_content_preview else "μμ"} | |
| λΆμν μ²ν¬ ν μ€νΈ: | |
| {chunk_content[:2000]} | |
| λ€μ νμμΌλ‘λ§ μλ΅νμΈμ (JSON νμ): | |
| {{ | |
| "pov": "νμ/μμ μ μ€λͺ νμΈμ (μ: 1μΈμΉ μ£ΌμΈκ³΅, 3μΈμΉ μ μ§μ μκ° λ±)", | |
| "characters": ["λ±μ₯μΈλ¬Ό1", "λ±μ₯μΈλ¬Ό2"], | |
| "time_background": "μκ°μ λ°°κ²½ μ€λͺ (μ: κ³Όκ±° νμ, νμ¬ μμ , λ―Έλ λ±)", | |
| "character_relationships": [ | |
| {{ | |
| "character1": "μΈλ¬Ό1", | |
| "character2": "μΈλ¬Ό2", | |
| "relationship": "νμ¬ μμ μμμ κ΄κ³ μ€λͺ (μ: μ°μΈ, μ , μΉκ΅¬, κ°μ‘± λ±)" | |
| }} | |
| ] | |
| }} | |
| character_relationshipsλ μ΄ μ²ν¬μ λ±μ₯νλ μΈλ¬Όλ€ κ°μ νμ¬ κ΄κ³λ₯Ό μλ³Έ μΉμμ€ μ 체 λ΄μ©μ μ°Έκ³ νμ¬ νμ ν κ²μ λλ€. | |
| μλ΅μ μ€μ§ JSON νμλ§ μ¬μ©νκ³ , λ€λ₯Έ μ€λͺ μ ν¬ν¨νμ§ λ§μΈμ.""" | |
| # λͺ¨λΈλͺ μ΄ μμΌλ©΄ κΈ°λ³Έκ° μ¬μ© (Gemini μ°μ μλ) | |
| if not model_name: | |
| # Gemini μλ | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name="gemini-1.5-flash", | |
| temperature=0.3, | |
| max_output_tokens=get_model_token_limit(model_name or "gemini-1.5-flash", 500) # μ μ₯λ ν ν° μ μ¬μ© | |
| ) | |
| if not result['error'] and result.get('response'): | |
| response_text = result['response'].strip() | |
| # JSON μΆμΆ | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| metadata = json.loads(json_match.group(0)) | |
| return metadata | |
| except: | |
| pass | |
| # λͺ¨λΈλͺ μ΄ μκ±°λ Gemini μ€ν¨ μ ν΄λΉ λͺ¨λΈ μ¬μ© | |
| if model_name: | |
| model_name_lower = model_name.lower().strip() | |
| is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-') | |
| if is_gemini: | |
| gemini_model_name = model_name.strip() | |
| if gemini_model_name.lower().startswith('gemini:'): | |
| gemini_model_name = gemini_model_name.split(':', 1)[1].strip() | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.3, | |
| max_output_tokens=get_model_token_limit(model_name or "gemini-1.5-flash", 500) # μ μ₯λ ν ν° μ μ¬μ© | |
| ) | |
| if not result['error'] and result.get('response'): | |
| response_text = result['response'].strip() | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| metadata = json.loads(json_match.group(0)) | |
| return metadata | |
| else: | |
| # Ollama API νΈμΆ | |
| try: | |
| # μ λ ₯ ν ν° μλ₯Ό num_ctxλ‘ μ¬μ© | |
| num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input') | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/generate', | |
| json={ | |
| 'model': model_name, | |
| 'prompt': prompt, | |
| 'stream': False, | |
| 'options': { | |
| 'temperature': 0.3, | |
| 'num_predict': get_model_token_limit(model_name, 500), # μ μ₯λ ν ν° μ μ¬μ© | |
| 'num_ctx': num_ctx # μ λ ₯ ν ν° μλ₯Ό 컨ν μ€νΈ μλμ°λ‘ μ¬μ© | |
| } | |
| }, | |
| timeout=120 # 2λΆ νμμμ | |
| ) | |
| if ollama_response.status_code == 200: | |
| response_data = ollama_response.json() | |
| response_text = response_data.get('response', '').strip() | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| metadata = json.loads(json_match.group(0)) | |
| return metadata | |
| except: | |
| pass | |
| # AI μΆμΆ μ€ν¨ μ κΈ°λ³Έκ° λ°ν | |
| return { | |
| "pov": None, | |
| "characters": [], | |
| "time_background": None, | |
| "character_relationships": [] | |
| } | |
| except Exception as e: | |
| print(f"[λ©νλ°μ΄ν° μΆμΆ] μ€λ₯: {str(e)}") | |
| return { | |
| "pov": None, | |
| "characters": [], | |
| "time_background": None, | |
| "character_relationships": [] | |
| } | |
| def extract_chunk_metadata(chunk_content, full_content=None, chunk_index=None, file_id=None, model_name=None): | |
| """μ²ν¬μ λ©νλ°μ΄ν° μΆμΆ (νμ, λ±μ₯μΈλ¬Ό, μκ°μ λ°°κ²½, μΈλ¬Ό κ΄κ³) | |
| Args: | |
| chunk_content: λΆμν μ²ν¬ λ΄μ© | |
| full_content: μλ³Έ μΉμμ€ μ 체 λ΄μ© (μΈλ¬Ό κ΄κ³ νμ μ©) | |
| chunk_index: μ²ν¬ μΈλ±μ€ | |
| file_id: νμΌ ID | |
| model_name: μ¬μ©ν AI λͺ¨λΈλͺ | |
| """ | |
| metadata = { | |
| "pov": None, | |
| "characters": [], | |
| "time_background": None, | |
| "character_relationships": [] | |
| } | |
| # AIλ₯Ό μ¬μ©ν λ©νλ°μ΄ν° μΆμΆ (νμ, λ±μ₯μΈλ¬Ό, μκ°μ λ°°κ²½, μΈλ¬Ό κ΄κ³) | |
| # Parent Chunkκ° μμΌλ©΄ μ°Έμ‘° | |
| parent_chunk = None | |
| if file_id: | |
| try: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| except: | |
| pass | |
| # μλ³Έ μΉμμ€ μ 체 λ΄μ©μ μ°Έμ‘°νμ¬ λ©νλ°μ΄ν° μΆμΆ | |
| ai_metadata = extract_metadata_with_ai(chunk_content, full_content, parent_chunk, model_name) | |
| if ai_metadata: | |
| metadata["pov"] = ai_metadata.get("pov") | |
| metadata["characters"] = ai_metadata.get("characters", []) | |
| metadata["time_background"] = ai_metadata.get("time_background") | |
| metadata["character_relationships"] = ai_metadata.get("character_relationships", []) | |
| return metadata | |
| def analyze_episode(episode_content, episode_title, full_content=None, parent_chunk=None, model_name=None): | |
| """νμ°¨λ³ λΆμ (μ£Όμ μ€ν 리, λ±μ₯μΈλ¬Ό, μΈλ¬Ό κ΄κ³ λ³ν, κΈ°ν) | |
| Args: | |
| episode_content: λΆμν νμ°¨ λ΄μ© | |
| episode_title: νμ°¨ μ λͺ© (μ: '1ν', '2ν') | |
| full_content: μλ³Έ μΉμμ€ μ 체 λ΄μ© (μ°Έκ³ μ©) | |
| parent_chunk: Parent Chunk κ°μ²΄ (μ νμ¬ν) | |
| model_name: μ¬μ©ν AI λͺ¨λΈλͺ | |
| Returns: | |
| λΆμ κ²°κ³Ό ν μ€νΈ (νλμ ν μ€νΈλ‘ μ΄μ΄μ μ μ₯) | |
| """ | |
| try: | |
| # μλ³Έ μΉμμ€ μ 체 λ΄μ©μ μ°Έμ‘° | |
| full_content_preview = "" | |
| if full_content: | |
| # μ 체 λ΄μ©μ΄ λ무 κΈΈλ©΄ μλΆλΆκ³Ό λ·λΆλΆ μΌλΆλ§ μ¬μ© (μ΅λ 30000μ) | |
| if len(full_content) > 30000: | |
| full_content_preview = full_content[:15000] + "\n... (μ€κ° μλ΅) ...\n" + full_content[-15000:] | |
| else: | |
| full_content_preview = full_content | |
| # Parent Chunk μ 보 μΆκ° | |
| parent_info = "" | |
| if parent_chunk: | |
| parent_info = f""" | |
| μν μ 체 μ 보: | |
| - μΈκ³κ΄: {parent_chunk.world_view or 'μμ'} | |
| - μ£Όμ μΊλ¦ν°: {parent_chunk.characters or 'μμ'} | |
| - μ£Όμ μ€ν 리: {parent_chunk.story or 'μμ'} | |
| """ | |
| # ν둬ννΈ μμ± | |
| prompt = f"""λ€μ μΉμμ€μ {episode_title} νμ°¨λ₯Ό λΆμνμ¬ μλ νλͺ©λ€μ νλμ ν μ€νΈλ‘ μ΄μ΄μ μμ±ν΄μ£ΌμΈμ. | |
| {parent_info} | |
| μλ³Έ μΉμμ€ μ 체 λ΄μ© (μ°Έκ³ μ©): | |
| {full_content_preview[:50000] if full_content_preview else "μμ"} | |
| λΆμν νμ°¨ λ΄μ© ({episode_title}): | |
| {episode_content[:10000] if len(episode_content) > 10000 else episode_content} | |
| λ€μ νμμΌλ‘ λΆμ κ²°κ³Όλ₯Ό μμ±ν΄μ£ΌμΈμ (νλμ ν μ€νΈλ‘ μ΄μ΄μ μμ±): | |
| ## {episode_title} μ£Όμ μ€ν 리 λΆμ | |
| [μ΄ νμ°¨μμ μΌμ΄λ μ£Όμ μ¬κ±΄κ³Ό μ€ν 리 μ κ°λ₯Ό μμΈν λΆμν΄μ£ΌμΈμ] | |
| ## {episode_title} μ£Όμ λ±μ₯ μΈλ¬Ό λΆμ | |
| [μ΄ νμ°¨μ λ±μ₯ν μ£Όμ μΈλ¬Όλ€κ³Ό κ·Έλ€μ μν , νλ, νΉμ§μ λΆμν΄μ£ΌμΈμ] | |
| ## μΈλ¬Όκ³Ό μΈλ¬Όκ°μ κ΄κ³ λ³ν | |
| [μ΄ νμ°¨μμ μΈλ¬Όλ€ κ°μ κ΄κ³κ° μ΄λ»κ² λ³ννλμ§, μλ‘μ΄ κ΄κ³κ° νμ±λμλμ§ λ±μ λΆμν΄μ£ΌμΈμ] | |
| ## {episode_title} μΈλ¬Ό μΈλͺ¨ λΆμ | |
| [μ΄ νμ°¨μ λ±μ₯ν μΈλ¬Όλ€μ μΈλͺ¨, 체ν, μΌκ΅΄ νΉμ§, μ 체μ νΉμ§ λ±μ μμΈν λΆμν΄μ£ΌμΈμ. νΉν μλ‘ λ±μ₯ν μΈλ¬Όμ΄λ μΈλͺ¨κ° λ³κ²½λ μΈλ¬Όμ λν΄ μμΈν μ€λͺ ν΄μ£ΌμΈμ] | |
| ## {episode_title} μΈλ¬Ό μ볡 λΆμ | |
| [μ΄ νμ°¨μ λ±μ₯ν μΈλ¬Όλ€μ΄ μ°©μ©ν μ볡, 볡μ₯, μ‘μΈμ리 λ±μ μμΈν λΆμν΄μ£ΌμΈμ. μ볡μ μ€νμΌ, μμ, νΉμ§, μν©μ λ§λ 볡μ₯μΈμ§ λ±μ λΆμν΄μ£ΌμΈμ] | |
| ## {episode_title} λ°°κ²½ λΆμ | |
| [μ΄ νμ°¨μ λ°°κ²½μ΄ λλ μ₯μ, νκ²½, μκ°λ, λΆμκΈ° λ±μ μμΈν λΆμν΄μ£ΌμΈμ. μ₯μμ νΉμ§, λΆμκΈ°, μκ°μ λ°°κ²½, λ μ¨, κ³μ λ±μ ν¬ν¨νμ¬ λΆμν΄μ£ΌμΈμ] | |
| ## κΈ°ν | |
| [μ΄ νμ°¨μ νΉλ³ν μ , μ€μ μ¬κ±΄, λ‘λ°₯, 볡μ λ± κΈ°ν μ€μν λ΄μ©μ λΆμν΄μ£ΌμΈμ] | |
| μλ΅μ μ νμμ κ·Έλλ‘ μ μ§νλ©΄μ κ° νλͺ©μ λν μμΈν λΆμ λ΄μ©μ μμ±ν΄μ£ΌμΈμ.""" | |
| # λͺ¨λΈλͺ μ΄ μμΌλ©΄ κΈ°λ³Έκ° μ¬μ© (Gemini μ°μ μλ) | |
| if not model_name: | |
| # Gemini μλ | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name="gemini-1.5-flash", | |
| temperature=0.5, | |
| max_output_tokens=get_model_token_limit("gemini-1.5-flash", 3000) # μ μ₯λ ν ν° μ μ¬μ© | |
| ) | |
| if not result['error'] and result.get('response'): | |
| return result['response'].strip() | |
| except Exception as e: | |
| print(f"[νμ°¨ λΆμ] Gemini κΈ°λ³Έ λͺ¨λΈ μ€λ₯: {str(e)}") | |
| # λͺ¨λΈλͺ μ΄ μκ±°λ Gemini μ€ν¨ μ ν΄λΉ λͺ¨λΈ μ¬μ© | |
| if model_name: | |
| model_name_lower = model_name.lower().strip() | |
| is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-') | |
| if is_gemini: | |
| gemini_model_name = model_name.strip() | |
| if gemini_model_name.lower().startswith('gemini:'): | |
| gemini_model_name = gemini_model_name.split(':', 1)[1].strip() | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.5, | |
| max_output_tokens=get_model_token_limit(model_name, 3000) # μ μ₯λ ν ν° μ μ¬μ© | |
| ) | |
| if not result['error'] and result.get('response'): | |
| return result['response'].strip() | |
| else: | |
| # Ollama API νΈμΆ | |
| try: | |
| # μ λ ₯ ν ν° μλ₯Ό num_ctxλ‘ μ¬μ© | |
| num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input') | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/generate', | |
| json={ | |
| 'model': model_name, | |
| 'prompt': prompt, | |
| 'stream': False, | |
| 'options': { | |
| 'temperature': 0.5, | |
| 'num_predict': get_model_token_limit(model_name, 3000), # μ μ₯λ ν ν° μ μ¬μ© | |
| 'num_ctx': num_ctx # μ λ ₯ ν ν° μλ₯Ό 컨ν μ€νΈ μλμ°λ‘ μ¬μ© | |
| } | |
| }, | |
| timeout=300 # 5λΆ νμμμ (νμ°¨ λΆμμ μκ°μ΄ μ€λ 걸릴 μ μμ) | |
| ) | |
| if ollama_response.status_code == 200: | |
| response_data = ollama_response.json() | |
| return response_data.get('response', '').strip() | |
| except requests.exceptions.Timeout: | |
| print(f"[νμ°¨ λΆμ] Ollama νμμμ: μμ² μκ°μ΄ μ΄κ³Όλμμ΅λλ€. (5λΆ)") | |
| print(f"[νμ°¨ λΆμ] νμ°¨ λ΄μ©μ΄ λ무 κΈΈκ±°λ λͺ¨λΈ μλ΅μ΄ λ릴 μ μμ΅λλ€.") | |
| except requests.exceptions.ConnectionError: | |
| print(f"[νμ°¨ λΆμ] Ollama μ°κ²° μ€λ₯: Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[νμ°¨ λΆμ] Ollama μ€λ₯: {str(e)}") | |
| # AI λΆμ μ€ν¨ μ κΈ°λ³Έκ° λ°ν | |
| return f"## {episode_title} λΆμ\nλΆμμ μλ£ν μ μμμ΅λλ€." | |
| except Exception as e: | |
| print(f"[νμ°¨ λΆμ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"## {episode_title} λΆμ\nλΆμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}" | |
| def extract_graph_from_episode(episode_content, episode_title, file_id, full_content=None, parent_chunk=None, model_name=None): | |
| """νμ°¨λ³ Graph Extraction (μν°ν°μ κ΄κ³ μΆμΆ) | |
| Args: | |
| episode_content: λΆμν νμ°¨ λ΄μ© | |
| episode_title: νμ°¨ μ λͺ© (μ: '1ν', '2ν') | |
| file_id: νμΌ ID | |
| full_content: μλ³Έ μΉμμ€ μ 체 λ΄μ© (μ°Έκ³ μ©) | |
| parent_chunk: Parent Chunk κ°μ²΄ (μ νμ¬ν) | |
| model_name: μ¬μ©ν AI λͺ¨λΈλͺ | |
| Returns: | |
| μΆμΆ μ±κ³΅ μ¬λΆ (bool) | |
| """ | |
| try: | |
| print(f"[Graph Extraction] '{episode_title}' Graph Extraction μμ...") | |
| # Parent Chunk μ 보 μΆκ° | |
| parent_info = "" | |
| if parent_chunk: | |
| parent_info = f""" | |
| μν μ 체 μ 보: | |
| - μΈκ³κ΄: {parent_chunk.world_view or 'μμ'} | |
| - μ£Όμ μΊλ¦ν°: {parent_chunk.characters or 'μμ'} | |
| - μ£Όμ μ€ν 리: {parent_chunk.story or 'μμ'} | |
| """ | |
| # Graph Extraction ν둬ννΈ μμ± | |
| from app.prompts.graph_extraction import get_graph_extraction_prompt | |
| prompt = get_graph_extraction_prompt( | |
| episode_content=episode_content, | |
| episode_title=episode_title, | |
| full_content=full_content, | |
| parent_chunk_info=parent_info, | |
| max_length=10000 | |
| ) | |
| # λͺ¨λΈλͺ μ΄ μμΌλ©΄ κΈ°λ³Έκ° μ¬μ© (Gemini μ°μ μλ) | |
| response_text = None | |
| if not model_name: | |
| # Gemini μλ | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name="gemini-1.5-flash", | |
| temperature=0.3, | |
| max_output_tokens=3000 | |
| ) | |
| if not result['error'] and result.get('response'): | |
| response_text = result['response'].strip() | |
| except Exception as e: | |
| print(f"[Graph Extraction] Gemini κΈ°λ³Έ λͺ¨λΈ μ€λ₯: {str(e)}") | |
| # λͺ¨λΈλͺ μ΄ μκ±°λ Gemini μ€ν¨ μ ν΄λΉ λͺ¨λΈ μ¬μ© | |
| if not response_text and model_name: | |
| model_name_lower = model_name.lower().strip() | |
| is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-') | |
| if is_gemini: | |
| gemini_model_name = model_name.strip() | |
| if gemini_model_name.lower().startswith('gemini:'): | |
| gemini_model_name = gemini_model_name.split(':', 1)[1].strip() | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.3, | |
| max_output_tokens=3000 | |
| ) | |
| if not result['error'] and result.get('response'): | |
| response_text = result['response'].strip() | |
| else: | |
| # Ollama API νΈμΆ | |
| try: | |
| # μ λ ₯ ν ν° μλ₯Ό num_ctxλ‘ μ¬μ© | |
| num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input') | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/generate', | |
| json={ | |
| 'model': model_name, | |
| 'prompt': prompt, | |
| 'stream': False, | |
| 'options': { | |
| 'temperature': 0.3, | |
| 'num_predict': 3000, | |
| 'num_ctx': num_ctx # μ λ ₯ ν ν° μλ₯Ό 컨ν μ€νΈ μλμ°λ‘ μ¬μ© | |
| } | |
| }, | |
| timeout=300 # 5λΆ νμμμ | |
| ) | |
| if ollama_response.status_code == 200: | |
| response_data = ollama_response.json() | |
| response_text = response_data.get('response', '').strip() | |
| except requests.exceptions.Timeout: | |
| print(f"[Graph Extraction] Ollama νμμμ: μμ² μκ°μ΄ μ΄κ³Όλμμ΅λλ€. (5λΆ)") | |
| except requests.exceptions.ConnectionError: | |
| print(f"[Graph Extraction] Ollama μ°κ²° μ€λ₯: Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[Graph Extraction] Ollama μ€λ₯: {str(e)}") | |
| if not response_text: | |
| print(f"[Graph Extraction] '{episode_title}' Graph Extraction μ€ν¨: μλ΅ μμ") | |
| return False | |
| # JSON μΆμΆ | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if not json_match: | |
| print(f"[Graph Extraction] '{episode_title}' Graph Extraction μ€ν¨: JSON νμμ΄ μλλλ€") | |
| print(f"[Graph Extraction] μλ΅ μΌλΆ: {response_text[:500]}") | |
| return False | |
| try: | |
| graph_data = json.loads(json_match.group(0)) | |
| except json.JSONDecodeError as e: | |
| print(f"[Graph Extraction] '{episode_title}' JSON νμ± μ€λ₯: {str(e)}") | |
| print(f"[Graph Extraction] μλ΅ μΌλΆ: {response_text[:500]}") | |
| return False | |
| # κΈ°μ‘΄ Graph λ°μ΄ν° μμ (κ°μ νμ°¨μ κΈ°μ‘΄ λ°μ΄ν°) | |
| GraphEntity.query.filter_by(file_id=file_id, episode_title=episode_title).delete() | |
| GraphRelationship.query.filter_by(file_id=file_id, episode_title=episode_title).delete() | |
| GraphEvent.query.filter_by(file_id=file_id, episode_title=episode_title).delete() | |
| db.session.commit() | |
| # λ°μ΄ν°λ² μ΄μ€μ μ μ₯ | |
| saved_count = 0 | |
| # μν°ν° μ μ₯ | |
| entities = graph_data.get('entities', {}) | |
| # μΈλ¬Ό μ μ₯ | |
| characters = entities.get('characters', []) | |
| for char in characters: | |
| if char.get('name'): | |
| entity = GraphEntity( | |
| file_id=file_id, | |
| episode_title=episode_title, | |
| entity_name=char.get('name', ''), | |
| entity_type='character', | |
| description=char.get('description'), | |
| role=char.get('role'), | |
| category=None | |
| ) | |
| db.session.add(entity) | |
| saved_count += 1 | |
| # μ₯μ μ μ₯ | |
| locations = entities.get('locations', []) | |
| for loc in locations: | |
| if loc.get('name'): | |
| entity = GraphEntity( | |
| file_id=file_id, | |
| episode_title=episode_title, | |
| entity_name=loc.get('name', ''), | |
| entity_type='location', | |
| description=loc.get('description'), | |
| role=None, | |
| category=loc.get('category') | |
| ) | |
| db.session.add(entity) | |
| saved_count += 1 | |
| # κ΄κ³ μ μ₯ | |
| relationships = graph_data.get('relationships', []) | |
| for rel in relationships: | |
| if rel.get('source') and rel.get('target'): | |
| relationship = GraphRelationship( | |
| file_id=file_id, | |
| episode_title=episode_title, | |
| source=rel.get('source', ''), | |
| target=rel.get('target', ''), | |
| relationship_type=rel.get('type', ''), | |
| description=rel.get('description'), | |
| event=rel.get('event') | |
| ) | |
| db.session.add(relationship) | |
| saved_count += 1 | |
| # μ¬κ±΄ μ μ₯ | |
| events = graph_data.get('events', []) | |
| for event in events: | |
| if event.get('name') or event.get('description'): | |
| participants = event.get('participants', []) | |
| participants_json = json.dumps(participants, ensure_ascii=False) if participants else None | |
| graph_event = GraphEvent( | |
| file_id=file_id, | |
| episode_title=episode_title, | |
| event_name=event.get('name', ''), | |
| description=event.get('description', ''), | |
| participants=participants_json, | |
| location=event.get('location'), | |
| significance=event.get('significance') | |
| ) | |
| db.session.add(graph_event) | |
| saved_count += 1 | |
| db.session.commit() | |
| print(f"[Graph Extraction] '{episode_title}' Graph Extraction μλ£: {saved_count}κ° νλͺ© μ μ₯") | |
| return True | |
| except Exception as e: | |
| print(f"[Graph Extraction] '{episode_title}' Graph Extraction μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| db.session.rollback() | |
| return False | |
| def create_chunks_for_file(file_id, content, skip_episode_analysis=False, skip_graph_extraction=False): | |
| """νμΌ λ΄μ©μ μΉμ λ³λ‘ λΆν νμ¬ μλ―Έ κΈ°λ° μ²ν¬λ‘ μ μ₯ (λ²‘ν° DB ν¬ν¨) | |
| μΉμ λΆν κ·μΉ: | |
| - #μνμ€λͺ λΆν° #1νκΉμ§: 'μνμ€λͺ ' μΉμ , λ©νλ°μ΄ν°μ #μνμ€λͺ μΆκ° | |
| - #nνλΆν° #n+1νκΉμ§: 'nν' μΉμ , λ©νλ°μ΄ν°μ νμ°¨ μ 보(nν) μΆκ° | |
| Args: | |
| file_id: νμΌ ID | |
| content: νμΌ λ΄μ© | |
| skip_episode_analysis: νμ°¨ λΆμ 건λλ°κΈ° (κΈ°λ³Έκ°: False) | |
| skip_graph_extraction: Graph Extraction 건λλ°κΈ° (κΈ°λ³Έκ°: False) | |
| """ | |
| try: | |
| print(f"[μ²ν¬ μμ±] νμΌ ID {file_id}μ λν μ²ν¬ μμ± μμ") | |
| print(f"[μ²ν¬ μμ±] μλ³Έ ν μ€νΈ κΈΈμ΄: {len(content)}μ") | |
| # νμΌ μ 보 κ°μ Έμ€κΈ° (λͺ¨λΈλͺ λ±) | |
| uploaded_file = UploadedFile.query.get(file_id) | |
| model_name = uploaded_file.model_name if uploaded_file else None | |
| # λ²‘ν° DB λ§€λμ κ°μ Έμ€κΈ° | |
| vector_db = get_vector_db() | |
| # κΈ°μ‘΄ μ²ν¬ μμ (DB + λ²‘ν° DB) | |
| existing_chunks = DocumentChunk.query.filter_by(file_id=file_id).all() | |
| if existing_chunks: | |
| print(f"[μ²ν¬ μμ±] κΈ°μ‘΄ μ²ν¬ {len(existing_chunks)}κ° μμ μ€...") | |
| # λ²‘ν° DBμμ μμ | |
| vector_db.delete_chunks_by_file_id(file_id) | |
| # DBμμ μμ | |
| DocumentChunk.query.filter_by(file_id=file_id).delete() | |
| db.session.commit() | |
| # μλ³Έ μΉμμ€μ μΉμ λ³λ‘ λΆν (#μνμ€λͺ , #1ν, #2ν λ±) | |
| sections = split_content_by_episodes(content) | |
| print(f"[μ²ν¬ μμ±] μΉμ λΆν μλ£: {len(sections)}κ° μΉμ ") | |
| for i, (section_type, section_title, section_content, section_metadata) in enumerate(sections): | |
| print(f"[μ²ν¬ μμ±] μΉμ {i+1}: {section_title} ({len(section_content)}μ)") | |
| if len(sections) == 0: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μΉμ μ΄ μμ±λμ§ μμμ΅λλ€.") | |
| return 0 | |
| # κΈ°μ‘΄ νμ°¨ λΆμ μμ | |
| existing_analyses = EpisodeAnalysis.query.filter_by(file_id=file_id).all() | |
| if existing_analyses: | |
| print(f"[νμ°¨ λΆμ] κΈ°μ‘΄ νμ°¨ λΆμ {len(existing_analyses)}κ° μμ μ€...") | |
| for analysis in existing_analyses: | |
| db.session.delete(analysis) | |
| db.session.commit() | |
| # '#μνμ€λͺ 'μ μ μΈν κ° νμ°¨ λΆμ | |
| episode_sections = [s for s in sections if s[0] != 'μνμ€λͺ '] # section_typeμ΄ 'μνμ€λͺ 'μ΄ μλ κ²λ§ | |
| if episode_sections and model_name and not skip_episode_analysis: | |
| print(f"[νμ°¨ λΆμ] {len(episode_sections)}κ° νμ°¨ λΆμ μμ...") | |
| # Parent Chunk κ°μ Έμ€κΈ° | |
| parent_chunk = None | |
| try: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| except: | |
| pass | |
| # κ° νμ°¨ λΆμ κ²°κ³Όλ₯Ό νλμ ν μ€νΈλ‘ μ΄μ΄μ μ μ₯ | |
| all_analyses = [] | |
| for section_type, section_title, section_content, section_metadata in episode_sections: | |
| try: | |
| print(f"[νμ°¨ λΆμ] '{section_title}' λΆμ μ€...") | |
| analysis_result = analyze_episode( | |
| episode_content=section_content, | |
| episode_title=section_title, | |
| full_content=content, | |
| parent_chunk=parent_chunk, | |
| model_name=model_name | |
| ) | |
| if analysis_result: | |
| all_analyses.append(f"\n\n{analysis_result}") | |
| print(f"[νμ°¨ λΆμ] '{section_title}' λΆμ μλ£") | |
| else: | |
| print(f"[νμ°¨ λΆμ] '{section_title}' λΆμ μ€ν¨ (κ²°κ³Ό μμ)") | |
| except Exception as e: | |
| print(f"[νμ°¨ λΆμ] '{section_title}' λΆμ μ€ μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| continue | |
| # λͺ¨λ νμ°¨ λΆμ κ²°κ³Όλ₯Ό νλμ ν μ€νΈλ‘ μ΄μ΄μ μ μ₯ | |
| if all_analyses: | |
| combined_analysis = "\n".join(all_analyses).strip() | |
| # νλμ ν΅ν© λΆμμΌλ‘ μ μ₯ (λλ μ μ μ₯νμ§ μκ³ νλμ μ΄μ΄μ μ μ₯) | |
| episode_analysis = EpisodeAnalysis( | |
| file_id=file_id, | |
| episode_title="μ 체 νμ°¨ ν΅ν© λΆμ", | |
| analysis_content=combined_analysis # λͺ¨λ νμ°¨ λΆμμ νλμ ν μ€νΈλ‘ μ μ₯ | |
| ) | |
| db.session.add(episode_analysis) | |
| db.session.commit() | |
| print(f"[νμ°¨ λΆμ] μλ£: {len(episode_sections)}κ° νμ°¨ λΆμ κ²°κ³Όλ₯Ό νλμ ν μ€νΈλ‘ μ μ₯") | |
| else: | |
| print(f"[νμ°¨ λΆμ] κ²½κ³ : λΆμ κ²°κ³Όκ° μμ΅λλ€.") | |
| # νμ°¨λ³ Graph Extraction μ€ν (νμ°¨ λΆμ μ±κ³΅ μ¬λΆμ κ΄κ³μμ΄ μ€ν) | |
| if episode_sections and model_name and not skip_graph_extraction: | |
| print(f"[Graph Extraction] νμ°¨λ³ Graph Extraction μμ...") | |
| # Parent Chunk κ°μ Έμ€κΈ° (νμ°¨ λΆμ λΈλ‘ λ°μμλ μ¬μ© κ°λ₯νλλ‘) | |
| parent_chunk = None | |
| try: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| except: | |
| pass | |
| graph_extraction_success_count = 0 | |
| for section_type, section_title, section_content, section_metadata in episode_sections: | |
| try: | |
| print(f"[Graph Extraction] '{section_title}' Graph Extraction μ€...") | |
| success = extract_graph_from_episode( | |
| episode_content=section_content, | |
| episode_title=section_title, | |
| file_id=file_id, | |
| full_content=content, | |
| parent_chunk=parent_chunk, | |
| model_name=model_name | |
| ) | |
| if success: | |
| graph_extraction_success_count += 1 | |
| print(f"[Graph Extraction] '{section_title}' Graph Extraction μλ£") | |
| else: | |
| print(f"[Graph Extraction] '{section_title}' Graph Extraction μ€ν¨") | |
| except Exception as e: | |
| print(f"[Graph Extraction] '{section_title}' Graph Extraction μ€ μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| continue | |
| print(f"[Graph Extraction] μλ£: {graph_extraction_success_count}/{len(episode_sections)}κ° νμ°¨ Graph Extraction μ±κ³΅") | |
| else: | |
| if not model_name: | |
| print(f"[νμ°¨ λΆμ] λͺ¨λΈλͺ μ΄ μμ΄ νμ°¨ λΆμμ 건λλλλ€.") | |
| elif not episode_sections: | |
| print(f"[νμ°¨ λΆμ] λΆμν νμ°¨κ° μμ΅λλ€.") | |
| # κ° μΉμ λ³λ‘ μ²ν¬ μμ± λ° μ μ₯ | |
| saved_count = 0 | |
| vector_saved_count = 0 | |
| global_chunk_index = 0 # μ 체 μ²ν¬ μΈλ±μ€ | |
| for section_idx, (section_type, section_title, section_content, section_metadata) in enumerate(sections): | |
| print(f"[μ²ν¬ μμ±] μΉμ '{section_title}' μ²λ¦¬ μ€... ({len(section_content)}μ)") | |
| # κ° μΉμ μ μλ―Έ κΈ°λ° μ²νΉ (λ¬Έμ₯κ³Ό λ¬Έλ¨ κ²½κ³λ₯Ό κ³ λ €νμ¬ λΆν ) | |
| # min_chunk_size: μ΅μ 200μ, max_chunk_size: μ΅λ 1000μ, overlap: 150μ | |
| section_chunks = split_text_into_chunks(section_content, min_chunk_size=200, max_chunk_size=1000, overlap=150) | |
| print(f"[μ²ν¬ μμ±] μΉμ '{section_title}' λΆν λ μ²ν¬ μ: {len(section_chunks)}κ°") | |
| # κ° μ²ν¬λ₯Ό λ°μ΄ν°λ² μ΄μ€μ λ²‘ν° DBμ μ μ₯ | |
| for chunk_idx, chunk_content in enumerate(section_chunks): | |
| try: | |
| # μΉμ λ©νλ°μ΄ν°λ₯Ό κΈ°λ³ΈμΌλ‘ μ¬μ© (chapter μ 보 ν¬ν¨) | |
| chunk_metadata = section_metadata.copy() | |
| # DBμ μ²ν¬ μ μ₯ (μΉμ λ©νλ°μ΄ν° ν¬ν¨) | |
| chunk = DocumentChunk( | |
| file_id=file_id, | |
| chunk_index=global_chunk_index, | |
| content=chunk_content, | |
| chunk_metadata=json.dumps(chunk_metadata, ensure_ascii=False) # μΉμ λ©νλ°μ΄ν° μ μ₯ | |
| ) | |
| db.session.add(chunk) | |
| db.session.flush() # ID μμ± | |
| # λ²‘ν° DBμ μ²ν¬ μΆκ° | |
| if vector_db.add_chunk( | |
| chunk_id=chunk.id, | |
| chunk_content=chunk_content, | |
| file_id=file_id, | |
| chunk_index=global_chunk_index | |
| ): | |
| vector_saved_count += 1 | |
| saved_count += 1 | |
| global_chunk_index += 1 | |
| # μ§ν μν© μΆλ ₯ (10κ°λ§λ€) | |
| if saved_count % 10 == 0: | |
| print(f"[μ²ν¬ μμ±] μ§ν μ€: {saved_count}κ° μ²ν¬ μ μ₯ μ€... (DB: {saved_count}, λ²‘ν° DB: {vector_saved_count})") | |
| except Exception as e: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μ²ν¬ {global_chunk_index} μ μ₯ μ€ μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| continue | |
| db.session.commit() | |
| print(f"[μ²ν¬ μμ±] μλ£: {saved_count}κ° μ²ν¬κ° λ°μ΄ν°λ² μ΄μ€μ μ μ₯λμμ΅λλ€. (λ²‘ν° DB: {vector_saved_count}κ°)") | |
| # μ μ₯ νμΈ | |
| verified_count = DocumentChunk.query.filter_by(file_id=file_id).count() | |
| if verified_count != saved_count: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μ μ₯λ μ²ν¬ μ({saved_count})μ νμΈλ μ²ν¬ μ({verified_count})κ° μΌμΉνμ§ μμ΅λλ€.") | |
| else: | |
| print(f"[μ²ν¬ μμ±] κ²μ¦ μλ£: {verified_count}κ° μ²ν¬κ° μ μμ μΌλ‘ μ μ₯λμμ΅λλ€.") | |
| return saved_count | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[μ²ν¬ μμ±] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return 0 | |
| def create_parent_chunk_with_ai(file_id, content, model_name): | |
| """AIλ₯Ό μ¬μ©νμ¬ Parent Chunk μμ± (μΉμμ€ λΆμ)""" | |
| try: | |
| print(f"[Parent Chunk μμ±] νμΌ ID {file_id}μ λν Parent Chunk μμ± μμ") | |
| print(f"[Parent Chunk μμ±] μ¬μ© λͺ¨λΈ: {model_name}") | |
| print(f"[Parent Chunk μμ±] μλ³Έ ν μ€νΈ κΈΈμ΄: {len(content)}μ") | |
| # λͺ¨λΈλͺ μ΄ Noneμ΄κ±°λ λΉ λ¬Έμμ΄μΈ κ²½μ° μ²λ¦¬ | |
| if not model_name or not model_name.strip(): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: λͺ¨λΈλͺ μ΄ μ 곡λμ§ μμμ΅λλ€.") | |
| return None | |
| # ν μ€νΈκ° λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 50000μ) | |
| content_preview = content[:50000] if len(content) > 50000 else content | |
| if len(content) > 50000: | |
| print(f"[Parent Chunk μμ±] ν μ€νΈκ° κΈΈμ΄ μΌλΆλ§ μ¬μ©: {len(content_preview)}μ (μ 체: {len(content)}μ)") | |
| # λΆμ ν둬ννΈ μμ± | |
| analysis_prompt = f"""λ€μ μΉμμ€ ν μ€νΈλ₯Ό λΆμνμ¬ λ€μ νλͺ©λ€μ μμ±ν΄μ£ΌμΈμ. κ° νλͺ©μ λͺ ννκ³ κ΅¬μ²΄μ μΌλ‘ μμ±ν΄μ£ΌμΈμ. | |
| ν μ€νΈ λ΄μ©: | |
| {content_preview} | |
| μ ν μ€νΈλ₯Ό λΆμνμ¬ λ€μ νμμΌλ‘ λ΅λ³ν΄μ£ΌμΈμ: | |
| ## μΈκ³κ΄ μ€λͺ | |
| [μΈκ³κ΄μ λν μμΈν μ€λͺ μ μμ±νμΈμ. λ°°κ²½, μ€μ , κ·μΉ λ±μ ν¬ν¨νμΈμ.] | |
| ## μ£Όμ μΊλ¦ν° λΆμ | |
| [μ£Όμ λ±μ₯μΈλ¬Όλ€μ μ΄λ¦, μν , μ±κ²©, νΉμ§ λ±μ λΆμνμ¬ μμ±νμΈμ. κ° μΊλ¦ν°λ³λ‘ ꡬλΆνμ¬ μμ±νμΈμ.] | |
| ## μ£Όμ μ€ν 리 λΆμ | |
| [μ 체μ μΈ μ€ν 리 νλ¦, μ£Όμ μ¬κ±΄, κ°λ± ꡬ쑰 λ±μ λΆμνμ¬ μμ±νμΈμ.] | |
| ## μ£Όμ μνΌμλ λΆμ | |
| [μ€μν μνΌμλλ μ±ν°λ³ μ£Όμ λ΄μ©μ λΆμνμ¬ μμ±νμΈμ. μκ° μμλλ‘ μ 리νλ©΄ μ’μ΅λλ€.] | |
| ## κΈ°ν | |
| [μ μΉ΄ν κ³ λ¦¬μ ν¬ν¨λμ§ μμ§λ§ μ€μν μ 보λ νΉμ§ λ±μ μμ±νμΈμ.] | |
| κ° νλͺ©μ λͺ ννκ² κ΅¬λΆνμ¬ μμ±ν΄μ£ΌμΈμ.""" | |
| # λͺ¨λΈ νμ νμΈ (Gemini λλ Ollama) | |
| # Gemini λͺ¨λΈλͺ νμ: "gemini:λͺ¨λΈλͺ " λλ "gemini-1.5-flash" (μ λμ¬ μλ κ²½μ°λ μ§μ) | |
| model_name_lower = model_name.lower().strip() | |
| is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-') | |
| print(f"[Parent Chunk μμ±] λͺ¨λΈ νμ νμΈ: is_gemini={is_gemini}, model_name={model_name}") | |
| if is_gemini: | |
| # Gemini API νΈμΆ | |
| # λͺ¨λΈλͺ μμ "gemini:" μ λμ¬ μ κ±° (λμλ¬Έμ κ΅¬λΆ μμ΄) | |
| gemini_model_name = model_name.strip() | |
| if gemini_model_name.lower().startswith('gemini:'): | |
| gemini_model_name = gemini_model_name.split(':', 1)[1].strip() | |
| # "gemini-"λ‘ μμνλ κ²½μ° (μ: "gemini-1.5-flash") κ·Έλλ‘ μ¬μ© | |
| print(f"[Parent Chunk μμ±] Gemini APIμ λΆμ μμ² μ μ‘ μ€... (λͺ¨λΈ: {gemini_model_name})") | |
| print(f"[Parent Chunk μμ±] μλ³Έ λͺ¨λΈλͺ : {model_name} -> Gemini λͺ¨λΈλͺ : {gemini_model_name}") | |
| gemini_client = get_gemini_client() | |
| if not gemini_client.is_configured(): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API ν€κ° μ€μ λμ§ μμμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: Gemini ν΄λΌμ΄μΈνΈ μν νμΈ μ€...") | |
| # API ν€ μν λ€μ νμΈ | |
| from app.gemini_client import get_gemini_api_key | |
| api_key = get_gemini_api_key() | |
| if api_key: | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: API ν€λ μ‘΄μ¬νμ§λ§ ν΄λΌμ΄μΈνΈκ° μ€μ λμ§ μμμ΅λλ€. (κΈΈμ΄: {len(api_key)})") | |
| else: | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: API ν€κ° λ°μ΄ν°λ² μ΄μ€μ μμ΅λλ€.") | |
| return None | |
| print(f"[Parent Chunk μμ±] Gemini API ν€ νμΈ μλ£. API νΈμΆ μμ...") | |
| result = gemini_client.generate_response( | |
| prompt=analysis_prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.7, | |
| max_output_tokens=get_model_token_limit_by_type(model_name or "gemini-1.5-flash", 8192, 'parent_chunk') # Parent Chunk μ μ© ν ν° μ μ¬μ© | |
| ) | |
| if result['error']: | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API νΈμΆ μ€ν¨ - {result['error']}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: result κ°μ²΄ λ΄μ©: {result}") | |
| return None | |
| if not result.get('response'): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API μλ΅μ΄ λΉμ΄μμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: result κ°μ²΄ λ΄μ©: {result}") | |
| return None | |
| analysis_result = result['response'] | |
| print(f"[Parent Chunk μμ±] Gemini API μλ΅ μμ μ±κ³΅: {len(analysis_result)}μ") | |
| else: | |
| # Ollama API νΈμΆ | |
| print(f"[Parent Chunk μμ±] Ollama APIμ λΆμ μμ² μ μ‘ μ€... (λͺ¨λΈ: {model_name})") | |
| try: | |
| # μ λ ₯ ν ν° μλ₯Ό num_ctxλ‘ μ¬μ© | |
| num_ctx = get_model_token_limit_by_type(model_name, 100000, 'input') | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/chat', | |
| json={ | |
| 'model': model_name, | |
| 'messages': [ | |
| { | |
| 'role': 'user', | |
| 'content': analysis_prompt | |
| } | |
| ], | |
| 'stream': False, | |
| 'options': { | |
| 'num_ctx': num_ctx # μ λ ₯ ν ν° μλ₯Ό 컨ν μ€νΈ μλμ°λ‘ μ¬μ© | |
| } | |
| }, | |
| timeout=300 # 5λΆ νμμμ | |
| ) | |
| if ollama_response.status_code != 200: | |
| error_detail = ollama_response.text if ollama_response.text else 'μμΈ μ 보 μμ' | |
| if ollama_response.status_code == 404: | |
| error_msg = f'Ollama API μ€λ₯ 404: λͺ¨λΈ "{model_name}"μ(λ₯Ό) μ°Ύμ μ μμ΅λλ€. λͺ¨λΈμ΄ Ollamaμ μ€μΉλμ΄ μλμ§ νμΈνμΈμ.' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: λ§μ½ Gemini λͺ¨λΈμ μ¬μ©νλ €λ©΄ λͺ¨λΈλͺ μ΄ 'gemini:' λλ 'gemini-'λ‘ μμν΄μΌ ν©λλ€.") | |
| else: | |
| error_msg = f'Ollama API μ€λ₯: {ollama_response.status_code} - {error_detail[:200]}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| return None | |
| response_data = ollama_response.json() | |
| analysis_result = response_data.get('message', {}).get('content', '') | |
| print(f"[Parent Chunk μμ±] Ollama API μλ΅ μμ μ±κ³΅: {len(analysis_result)}μ") | |
| except requests.exceptions.Timeout: | |
| print(f"[Parent Chunk μμ±] β Ollama νμμμ: μμ² μκ°μ΄ μ΄κ³Όλμμ΅λλ€. (5λΆ)") | |
| print(f"[Parent Chunk μμ±] νμΌμ΄ λ무 ν¬κ±°λ λͺ¨λΈ μλ΅μ΄ λ릴 μ μμ΅λλ€.") | |
| return None | |
| except requests.exceptions.ConnectionError: | |
| print(f"[Parent Chunk μμ±] β Ollama μ°κ²° μ€λ₯: Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: Ollama URL: {OLLAMA_BASE_URL}") | |
| return None | |
| except requests.exceptions.RequestException as e: | |
| print(f"[Parent Chunk μμ±] β Ollama API μ€λ₯: {str(e)}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: Ollama URL: {OLLAMA_BASE_URL}") | |
| return None | |
| if not analysis_result: | |
| print(f"[Parent Chunk μμ±] β οΈ κ²½κ³ : λΆμ κ²°κ³Όκ° λΉμ΄μμ΅λλ€.") | |
| return None | |
| print(f"[Parent Chunk μμ±] λΆμ κ²°κ³Ό μμ μλ£: {len(analysis_result)}μ") | |
| # λΆμ κ²°κ³Ό νμ± | |
| world_view = "" | |
| characters = "" | |
| story = "" | |
| episodes = "" | |
| others = "" | |
| # κ° μΉμ μΆμΆ | |
| sections = { | |
| 'world_view': ['## μΈκ³κ΄ μ€λͺ ', '## μΈκ³κ΄', 'μΈκ³κ΄ μ€λͺ '], | |
| 'characters': ['## μ£Όμ μΊλ¦ν° λΆμ', '## μ£Όμ μΊλ¦ν°', 'μ£Όμ μΊλ¦ν° λΆμ', '## μΊλ¦ν°'], | |
| 'story': ['## μ£Όμ μ€ν 리 λΆμ', '## μ£Όμ μ€ν 리', 'μ£Όμ μ€ν 리 λΆμ', '## μ€ν 리'], | |
| 'episodes': ['## μ£Όμ μνΌμλ λΆμ', '## μ£Όμ μνΌμλ', 'μ£Όμ μνΌμλ λΆμ', '## μνΌμλ'], | |
| 'others': ['## κΈ°ν', 'κΈ°ν'] | |
| } | |
| lines = analysis_result.split('\n') | |
| current_section = None | |
| current_content = [] | |
| for line in lines: | |
| line_stripped = line.strip() | |
| # μΉμ ν€λ νμΈ | |
| section_found = False | |
| for section_key, section_headers in sections.items(): | |
| for header in section_headers: | |
| if header in line_stripped: | |
| # μ΄μ μΉμ μ μ₯ | |
| if current_section: | |
| if current_section == 'world_view': | |
| world_view = '\n'.join(current_content).strip() | |
| elif current_section == 'characters': | |
| characters = '\n'.join(current_content).strip() | |
| elif current_section == 'story': | |
| story = '\n'.join(current_content).strip() | |
| elif current_section == 'episodes': | |
| episodes = '\n'.join(current_content).strip() | |
| elif current_section == 'others': | |
| others = '\n'.join(current_content).strip() | |
| current_section = section_key | |
| current_content = [] | |
| section_found = True | |
| break | |
| if section_found: | |
| break | |
| if not section_found and current_section: | |
| # νμ¬ μΉμ μ λ΄μ© μΆκ° | |
| if line_stripped and not line_stripped.startswith('#'): | |
| current_content.append(line) | |
| # λ§μ§λ§ μΉμ μ μ₯ | |
| if current_section: | |
| if current_section == 'world_view': | |
| world_view = '\n'.join(current_content).strip() | |
| elif current_section == 'characters': | |
| characters = '\n'.join(current_content).strip() | |
| elif current_section == 'story': | |
| story = '\n'.join(current_content).strip() | |
| elif current_section == 'episodes': | |
| episodes = '\n'.join(current_content).strip() | |
| elif current_section == 'others': | |
| others = '\n'.join(current_content).strip() | |
| # νμ± μ€ν¨ μ μ 체 λ΄μ©μ "κΈ°ν"μ μ μ₯ | |
| if not world_view and not characters and not story and not episodes: | |
| print(f"[Parent Chunk μμ±] κ²½κ³ : μΉμ νμ± μ€ν¨. μ 체 λ΄μ©μ 'κΈ°ν'μ μ μ₯ν©λλ€.") | |
| others = analysis_result.strip() | |
| # κΈ°μ‘΄ Parent Chunk μμ (μμΌλ©΄) | |
| existing_parent = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if existing_parent: | |
| db.session.delete(existing_parent) | |
| db.session.commit() | |
| print(f"[Parent Chunk μμ±] κΈ°μ‘΄ Parent Chunk μμ μλ£") | |
| # Parent Chunk μμ± λ° μ μ₯ | |
| parent_chunk = ParentChunk( | |
| file_id=file_id, | |
| world_view=world_view if world_view else None, | |
| characters=characters if characters else None, | |
| story=story if story else None, | |
| episodes=episodes if episodes else None, | |
| others=others if others else None | |
| ) | |
| db.session.add(parent_chunk) | |
| db.session.commit() | |
| print(f"[Parent Chunk μμ±] β μλ£: Parent Chunkκ° μμ±λμμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] - μΈκ³κ΄: {len(world_view)}μ") | |
| print(f"[Parent Chunk μμ±] - μΊλ¦ν°: {len(characters)}μ") | |
| print(f"[Parent Chunk μμ±] - μ€ν 리: {len(story)}μ") | |
| print(f"[Parent Chunk μμ±] - μνΌμλ: {len(episodes)}μ") | |
| print(f"[Parent Chunk μμ±] - κΈ°ν: {len(others)}μ") | |
| return parent_chunk | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f'Ollama API μ°κ²° μ€λ₯: {str(e)}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = f'Parent Chunk μμ± μ€ μ€λ₯: {str(e)}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def get_parent_chunks_for_files(file_ids): | |
| """νμΌ ID λͺ©λ‘μ λν Parent Chunk μ‘°ν (λ¬Έλ§₯ νμ μ©)""" | |
| try: | |
| if not file_ids: | |
| return [] | |
| parent_chunks = [] | |
| for file_id in file_ids: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if parent_chunk: | |
| parent_chunks.append(parent_chunk) | |
| return parent_chunks | |
| except Exception as e: | |
| print(f"[Parent Chunk μ‘°ν] μ€λ₯: {str(e)}") | |
| return [] | |
| def get_episode_analyses_for_files(file_ids): | |
| """νμΌ ID λͺ©λ‘μ λν νμ°¨λ³ λΆμ(EpisodeAnalysis) μ‘°ν (νμ°¨λ³ μμ½ μ°Έμ‘°μ©)""" | |
| try: | |
| if not file_ids: | |
| return [] | |
| episode_analyses = [] | |
| for file_id in file_ids: | |
| episode_analysis = EpisodeAnalysis.query.filter_by(file_id=file_id).first() | |
| if episode_analysis: | |
| episode_analyses.append(episode_analysis) | |
| return episode_analyses | |
| except Exception as e: | |
| print(f"[νμ°¨λ³ λΆμ μ‘°ν] μ€λ₯: {str(e)}") | |
| return [] | |
| def get_relevant_graph_data(query, file_ids=None): | |
| """μ§λ¬Έκ³Ό κ΄λ ¨λ GraphRAG λ°μ΄ν° μ‘°ν (μν°ν°, κ΄κ³, μ¬κ±΄) | |
| Args: | |
| query: μ¬μ©μ μ§λ¬Έ | |
| file_ids: νμΌ ID λͺ©λ‘ (Noneμ΄λ©΄ λͺ¨λ νμΌ) | |
| Returns: | |
| dict: { | |
| 'entities': [...], | |
| 'relationships': [...], | |
| 'events': [...], | |
| 'episodes': [...] | |
| } | |
| """ | |
| try: | |
| if not file_ids: | |
| return { | |
| 'entities': [], | |
| 'relationships': [], | |
| 'events': [], | |
| 'episodes': [] | |
| } | |
| # μ§λ¬Έμμ ν€μλ μΆμΆ (νκΈ λ¨μ΄, μλ¬Έ λ¨μ΄) | |
| query_words = set(re.findall(r'[κ°-ν£]+|\w+', query.lower())) | |
| # νμΌ ID νμ₯ (μ΄μ΄μ μ λ‘λλ νμΌ ν¬ν¨) | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # μν°ν° κ²μ (μΈλ¬Ό, μ₯μ μ΄λ¦μ΄ μ§λ¬Έμ ν¬ν¨λ κ²½μ°) | |
| entities = [] | |
| if query_words: | |
| # μν°ν° μ΄λ¦μ μ§λ¬Έμ ν€μλκ° ν¬ν¨λ κ²½μ° | |
| entity_query = GraphEntity.query.filter( | |
| GraphEntity.file_id.in_(expanded_file_ids) | |
| ) | |
| # ν€μλ λ§€μΉ (μν°ν° μ΄λ¦μ΄λ μ€λͺ μ ν¬ν¨) | |
| matching_entities = [] | |
| for entity in entity_query.all(): | |
| entity_name_lower = entity.entity_name.lower() | |
| entity_desc_lower = (entity.description or '').lower() | |
| # μν°ν° μ΄λ¦μ΄λ μ€λͺ μ μ§λ¬Έ ν€μλκ° ν¬ν¨λμ΄ μλμ§ νμΈ | |
| if any(word in entity_name_lower or word in entity_desc_lower for word in query_words if len(word) > 1): | |
| matching_entities.append(entity) | |
| entities = matching_entities[:20] # μ΅λ 20κ° | |
| # κ΄κ³ κ²μ (κ΄κ³μ 주체λ λμμ΄ μ§λ¬Έμ ν¬ν¨λ κ²½μ°) | |
| relationships = [] | |
| if query_words: | |
| relationship_query = GraphRelationship.query.filter( | |
| GraphRelationship.file_id.in_(expanded_file_ids) | |
| ) | |
| matching_relationships = [] | |
| for rel in relationship_query.all(): | |
| source_lower = rel.source.lower() | |
| target_lower = rel.target.lower() | |
| rel_type_lower = rel.relationship_type.lower() | |
| rel_desc_lower = (rel.description or '').lower() | |
| # κ΄κ³μ 주체, λμ, μ ν, μ€λͺ μ μ§λ¬Έ ν€μλκ° ν¬ν¨λμ΄ μλμ§ νμΈ | |
| if any(word in source_lower or word in target_lower or word in rel_type_lower or word in rel_desc_lower | |
| for word in query_words if len(word) > 1): | |
| matching_relationships.append(rel) | |
| relationships = matching_relationships[:20] # μ΅λ 20κ° | |
| # μ¬κ±΄ κ²μ (μ¬κ±΄ μ΄λ¦μ΄λ μ€λͺ μ μ§λ¬Έ ν€μλκ° ν¬ν¨λ κ²½μ°) | |
| events = [] | |
| if query_words: | |
| event_query = GraphEvent.query.filter( | |
| GraphEvent.file_id.in_(expanded_file_ids) | |
| ) | |
| matching_events = [] | |
| for event in event_query.all(): | |
| event_name_lower = (event.event_name or '').lower() | |
| event_desc_lower = (event.description or '').lower() | |
| event_location_lower = (event.location or '').lower() | |
| # μ¬κ±΄ μ΄λ¦, μ€λͺ , μ₯μμ μ§λ¬Έ ν€μλκ° ν¬ν¨λμ΄ μλμ§ νμΈ | |
| if any(word in event_name_lower or word in event_desc_lower or word in event_location_lower | |
| for word in query_words if len(word) > 1): | |
| matching_events.append(event) | |
| events = matching_events[:20] # μ΅λ 20κ° | |
| # κ΄λ ¨ νμ°¨ μΆμΆ | |
| episodes = set() | |
| for entity in entities: | |
| episodes.add(entity.episode_title) | |
| for rel in relationships: | |
| episodes.add(rel.episode_title) | |
| for event in events: | |
| episodes.add(event.episode_title) | |
| print(f"[GraphRAG κ²μ] κ΄λ ¨ λ°μ΄ν° λ°κ²¬: μν°ν° {len(entities)}κ°, κ΄κ³ {len(relationships)}κ°, μ¬κ±΄ {len(events)}κ°, νμ°¨ {len(episodes)}κ°") | |
| return { | |
| 'entities': [e.to_dict() for e in entities], | |
| 'relationships': [r.to_dict() for r in relationships], | |
| 'events': [ev.to_dict() for ev in events], | |
| 'episodes': sorted(list(episodes)) | |
| } | |
| except Exception as e: | |
| print(f"[GraphRAG κ²μ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return { | |
| 'entities': [], | |
| 'relationships': [], | |
| 'events': [], | |
| 'episodes': [] | |
| } | |
| def search_relevant_chunks(query, file_ids=None, model_name=None, top_k=5, min_score=1): | |
| """ | |
| μ§λ¬Έκ³Ό κ΄λ ¨λ μ²ν¬ κ²μ (λ²‘ν° κ²μ + Re-ranking) | |
| 1. λ²‘ν° κ²μμΌλ‘ μ΄κΈ° 30κ° λ¬Έμ κ²μ | |
| 2. Cross-Encoderλ‘ λ¦¬λνΉ | |
| 3. μμ top_kκ° λ°ν (κΈ°λ³Έ 5κ°) | |
| """ | |
| try: | |
| # λ²‘ν° DB λ§€λμ κ°μ Έμ€κΈ° | |
| vector_db = get_vector_db() | |
| # νμΌ ID νμ₯ (μ΄μ΄μ μ λ‘λλ νμΌ ν¬ν¨) | |
| expanded_file_ids = None | |
| if file_ids: | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # μλ³Έ νμΌμ΄ μ νλ κ²½μ°, μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all() | |
| for parent_file in parent_files: | |
| child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # λͺ¨λΈ νν°λ§μ΄ νμν κ²½μ° νμΌ ID νν°λ§ | |
| if model_name and expanded_file_ids: | |
| filtered_files = UploadedFile.query.filter( | |
| UploadedFile.id.in_(expanded_file_ids), | |
| UploadedFile.model_name == model_name | |
| ).all() | |
| expanded_file_ids = [f.id for f in filtered_files] | |
| elif model_name and not expanded_file_ids: | |
| # νμΌ IDκ° μμΌλ©΄ λͺ¨λΈ μ΄λ¦μΌλ‘λ§ νν°λ§ | |
| filtered_files = UploadedFile.query.filter_by(model_name=model_name).all() | |
| expanded_file_ids = [f.id for f in filtered_files] | |
| # 1λ¨κ³: λ²‘ν° κ²μμΌλ‘ μ΄κΈ° 30κ° λ¬Έμ κ²μ | |
| print(f"[λ²‘ν° κ²μ] 쿼리: {query[:50]}..., νμΌ ID: {expanded_file_ids if expanded_file_ids else 'λͺ¨λ νμΌ'}") | |
| vector_results = vector_db.search_chunks( | |
| query=query, | |
| file_ids=expanded_file_ids, | |
| top_k=30 | |
| ) | |
| if not vector_results: | |
| print(f"[λ²‘ν° κ²μ] κ²°κ³Ό μμ, ν€μλ κΈ°λ° κ²μμΌλ‘ λ체") | |
| # λ²‘ν° κ²μ κ²°κ³Όκ° μμΌλ©΄ κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μμΌλ‘ λ체 | |
| return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score) | |
| # 2λ¨κ³: Cross-Encoderλ‘ λ¦¬λνΉ | |
| print(f"[리λνΉ] {len(vector_results)}κ° μ²ν¬μ λν 리λνΉ μμ...") | |
| reranked_chunks = vector_db.rerank_chunks( | |
| query=query, | |
| chunks=vector_results, | |
| top_k=top_k | |
| ) | |
| # 3λ¨κ³: DBμμ μ²ν¬ κ°μ²΄ κ°μ Έμ€κΈ° | |
| final_chunks = [] | |
| for reranked in reranked_chunks: | |
| chunk_id = reranked['chunk_id'] | |
| chunk = DocumentChunk.query.get(chunk_id) | |
| if chunk: | |
| final_chunks.append(chunk) | |
| print(f"[λ²‘ν° κ²μ + 리λνΉ] μ΅μ’ {len(final_chunks)}κ° μ²ν¬ λ°ν") | |
| return final_chunks | |
| except Exception as e: | |
| print(f"[λ²‘ν° κ²μ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| # μ€λ₯ μ κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μμΌλ‘ λ체 | |
| print(f"[λ²‘ν° κ²μ] ν€μλ κΈ°λ° κ²μμΌλ‘ λ체") | |
| return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score) | |
| def search_relevant_chunks_fallback(query, file_ids=None, model_name=None, top_k=25, min_score=1): | |
| """κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μ (Fallback)""" | |
| try: | |
| # κ²μ 쿼리 μ€λΉ - νκΈκ³Ό μλ¬Έ λ¨μ΄ λͺ¨λ μΆμΆ | |
| query_words = set(re.findall(r'[κ°-ν£]+|\w+', query.lower())) | |
| if not query_words: | |
| return [] | |
| # μ²ν¬ μ‘°ν | |
| query_obj = DocumentChunk.query.join(UploadedFile) | |
| if file_ids: | |
| # μ νλ νμΌ IDμ κ·Έ νμΌμ μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌ ID ν¬ν¨ | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # μλ³Έ νμΌμ΄ μ νλ κ²½μ°, μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all() | |
| for parent_file in parent_files: | |
| child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| query_obj = query_obj.filter(UploadedFile.id.in_(expanded_file_ids)) | |
| if model_name: | |
| query_obj = query_obj.filter(UploadedFile.model_name == model_name) | |
| all_chunks = query_obj.all() | |
| if not all_chunks: | |
| return [] | |
| # κ° μ²ν¬μ κ΄λ ¨λ μ μ κ³μ° (κ°μ λ μκ³ λ¦¬μ¦) | |
| scored_chunks = [] | |
| for chunk in all_chunks: | |
| chunk_content_lower = chunk.content.lower() | |
| chunk_words = set(re.findall(r'[κ°-ν£]+|\w+', chunk_content_lower)) | |
| # 1. κ³΅ν΅ λ¨μ΄ μ (κΈ°λ³Έ μ μ) | |
| common_words = query_words & chunk_words | |
| base_score = len(common_words) | |
| # 2. 쿼리 λ¨μ΄μ λΉλ κ°μ€μΉ (μ€μν λ¨μ΄κ° λ λ§μ΄ λνλ μλ‘ λμ μ μ) | |
| frequency_score = 0 | |
| for word in query_words: | |
| frequency_score += chunk_content_lower.count(word) | |
| # 3. 쿼리 λ¨μ΄ λΉμ¨ (μ²ν¬μμ 쿼리 λ¨μ΄κ° μ°¨μ§νλ λΉμ¨) | |
| if len(chunk_words) > 0: | |
| ratio_score = len(common_words) / len(chunk_words) * 10 | |
| else: | |
| ratio_score = 0 | |
| # μ΅μ’ μ μ κ³μ° (κ°μ€μΉ μ μ©) | |
| final_score = base_score * 2 + frequency_score * 0.5 + ratio_score | |
| # μ΅μ μ μ μ΄μμΈ μ²ν¬λ§ ν¬ν¨ | |
| if final_score >= min_score: | |
| scored_chunks.append((final_score, chunk)) | |
| # μ μ μμΌλ‘ μ λ ¬νκ³ μμ kκ° μ ν | |
| scored_chunks.sort(key=lambda x: x[0], reverse=True) | |
| # top_kκ° μ ν | |
| top_chunks = [chunk for score, chunk in scored_chunks[:top_k]] | |
| return top_chunks | |
| except Exception as e: | |
| print(f"[ν€μλ κ²μ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return [] | |
| def login(): | |
| """λ‘κ·ΈμΈ νμ΄μ§""" | |
| if current_user.is_authenticated: | |
| # κ΄λ¦¬μμΈ κ²½μ° κ΄λ¦¬μ νμ΄μ§λ‘ 리λ€μ΄λ νΈ | |
| if current_user.is_admin: | |
| return redirect(url_for('main.admin')) | |
| return redirect(url_for('main.index')) | |
| if request.method == 'POST': | |
| username = request.form.get('username', '').strip() | |
| password = request.form.get('password', '') | |
| if not username or not password: | |
| flash('μ¬μ©μλͺ κ³Ό λΉλ°λ²νΈλ₯Ό μ λ ₯ν΄μ£ΌμΈμ.', 'error') | |
| return render_template('login.html') | |
| user = User.query.filter_by(username=username).first() | |
| if user and user.check_password(password) and user.is_active: | |
| login_user(user) | |
| user.last_login = datetime.utcnow() | |
| db.session.commit() | |
| next_page = request.args.get('next') | |
| # κ΄λ¦¬μμΈ κ²½μ° κ΄λ¦¬μ νμ΄μ§λ‘ 리λ€μ΄λ νΈ | |
| if user.is_admin: | |
| return redirect(next_page) if next_page else redirect(url_for('main.admin')) | |
| return redirect(next_page) if next_page else redirect(url_for('main.index')) | |
| else: | |
| flash('μ¬μ©μλͺ λλ λΉλ°λ²νΈκ° μ¬λ°λ₯΄μ§ μμ΅λλ€.', 'error') | |
| return render_template('login.html') | |
| def logout(): | |
| """λ‘κ·Έμμ""" | |
| logout_user() | |
| flash('λ‘κ·Έμμλμμ΅λλ€.', 'info') | |
| return redirect(url_for('main.login')) | |
| def index(): | |
| return render_template('index.html') | |
| def webnovels(): | |
| """μ λ‘λλ μΉμμ€ λͺ©λ‘ νμ΄μ§""" | |
| return render_template('webnovels.html') | |
| def admin(): | |
| """κ΄λ¦¬μ νμ΄μ§""" | |
| users = User.query.order_by(User.created_at.desc()).all() | |
| return render_template('admin.html', users=users) | |
| def admin_messages(): | |
| """κ΄λ¦¬μ λ©μμ§ νμΈ νμ΄μ§""" | |
| return render_template('admin_messages.html') | |
| def admin_webnovels(): | |
| """μΉμμ€ κ΄λ¦¬ νμ΄μ§""" | |
| return render_template('admin_webnovels.html') | |
| def admin_prompts(): | |
| """ν둬ννΈ κ΄λ¦¬ νμ΄μ§""" | |
| return render_template('admin_prompts.html') | |
| def admin_settings(): | |
| """AI μ€μ κ΄λ¦¬ νμ΄μ§ (API ν€, ν ν° μ)""" | |
| return render_template('admin_settings.html') | |
| def admin_files(): | |
| """νμΌ λͺ©λ‘ κ΄λ¦¬ νμ΄μ§""" | |
| return render_template('admin_files.html') | |
| def get_users(): | |
| """μ¬μ©μ λͺ©λ‘ API""" | |
| try: | |
| users = User.query.order_by(User.created_at.desc()).all() | |
| return jsonify({ | |
| 'users': [user.to_dict() for user in users] | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ¬μ©μ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_user(): | |
| """μ¬μ©μ μμ± API""" | |
| try: | |
| data = request.json | |
| username = data.get('username', '').strip() | |
| nickname = data.get('nickname', '').strip() | |
| password = data.get('password', '') | |
| is_admin = data.get('is_admin', False) | |
| if not username or not password: | |
| return jsonify({'error': 'μ¬μ©μλͺ κ³Ό λΉλ°λ²νΈλ₯Ό μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| if User.query.filter_by(username=username).first(): | |
| return jsonify({'error': 'μ΄λ―Έ μ‘΄μ¬νλ μ¬μ©μλͺ μ λλ€.'}), 400 | |
| user = User(username=username, nickname=nickname if nickname else None, is_admin=is_admin, is_active=True) | |
| user.set_password(password) | |
| db.session.add(user) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'μ¬μ©μκ° μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€.', | |
| 'user': user.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def update_user(user_id): | |
| """μ¬μ©μ μ 보 μμ API""" | |
| try: | |
| user = User.query.get_or_404(user_id) | |
| data = request.json | |
| # μκΈ° μμ μ κ΄λ¦¬μ κΆνμ μ κ±°νλ κ²μ λ°©μ§ | |
| if user_id == current_user.id and data.get('is_admin') == False: | |
| return jsonify({'error': 'μκΈ° μμ μ κ΄λ¦¬μ κΆνμ μ κ±°ν μ μμ΅λλ€.'}), 400 | |
| if 'username' in data: | |
| new_username = data['username'].strip() | |
| if new_username != user.username: | |
| if User.query.filter_by(username=new_username).first(): | |
| return jsonify({'error': 'μ΄λ―Έ μ‘΄μ¬νλ μ¬μ©μλͺ μ λλ€.'}), 400 | |
| user.username = new_username | |
| if 'nickname' in data: | |
| user.nickname = data['nickname'].strip() if data['nickname'] else None | |
| if 'password' in data and data['password']: | |
| user.set_password(data['password']) | |
| if 'is_admin' in data: | |
| user.is_admin = data['is_admin'] | |
| if 'is_active' in data: | |
| user.is_active = data['is_active'] | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'μ¬μ©μ μ λ³΄κ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.', | |
| 'user': user.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μ 보 μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_messages(): | |
| """μ 체 λ©μμ§ μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| user_id = request.args.get('user_id', type=int) | |
| session_id = request.args.get('session_id', type=int) | |
| message_id = request.args.get('message_id', type=int) | |
| page = request.args.get('page', 1, type=int) | |
| per_page = request.args.get('per_page', 50, type=int) | |
| query = ChatMessage.query.join(ChatSession) | |
| if user_id: | |
| query = query.filter(ChatSession.user_id == user_id) | |
| if session_id: | |
| query = query.filter(ChatMessage.session_id == session_id) | |
| if message_id: | |
| query = query.filter(ChatMessage.id == message_id) | |
| messages = query.order_by(ChatMessage.created_at.desc())\ | |
| .paginate(page=page, per_page=per_page, error_out=False) | |
| return jsonify({ | |
| 'messages': [msg.to_dict() for msg in messages.items], | |
| 'total': messages.total, | |
| 'pages': messages.pages, | |
| 'current_page': page | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λ©μμ§ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_sessions(): | |
| """μ 체 λν μΈμ μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| user_id = request.args.get('user_id', type=int) | |
| page = request.args.get('page', 1, type=int) | |
| per_page = request.args.get('per_page', 50, type=int) | |
| query = ChatSession.query | |
| if user_id: | |
| query = query.filter(ChatSession.user_id == user_id) | |
| sessions = query.order_by(ChatSession.updated_at.desc())\ | |
| .paginate(page=page, per_page=per_page, error_out=False) | |
| sessions_data = [] | |
| for session in sessions.items: | |
| session_dict = session.to_dict() | |
| session_dict['username'] = session.user.username if session.user else 'Unknown' | |
| session_dict['nickname'] = session.user.nickname if session.user else None | |
| sessions_data.append(session_dict) | |
| return jsonify({ | |
| 'sessions': sessions_data, | |
| 'total': sessions.total, | |
| 'pages': sessions.pages, | |
| 'current_page': page | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_user(user_id): | |
| """μ¬μ©μ μμ API""" | |
| try: | |
| user = User.query.get_or_404(user_id) | |
| # μκΈ° μμ μ μμ νλ κ²μ λ°©μ§ | |
| if user_id == current_user.id: | |
| return jsonify({'error': 'μκΈ° μμ μ μμ ν μ μμ΅λλ€.'}), 400 | |
| db.session.delete(user) | |
| db.session.commit() | |
| return jsonify({'message': 'μ¬μ©μκ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.'}), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_gemini_api_key(): | |
| """Gemini API ν€ μ‘°ν""" | |
| try: | |
| # SystemConfigμμ API ν€ κ°μ Έμ€κΈ° (ν μ΄λΈμ΄ μμΌλ©΄ λΉ λ¬Έμμ΄ λ°ν) | |
| api_key = SystemConfig.get_config('gemini_api_key', '') | |
| # 보μμ μν΄ λ§μ€νΉλ κ° λ°ν (μ²μ 8μλ§ νμ) | |
| masked_key = api_key[:8] + '...' if api_key and len(api_key) > 8 else '' | |
| return jsonify({ | |
| 'has_api_key': bool(api_key), | |
| 'masked_key': masked_key | |
| }), 200 | |
| except Exception as e: | |
| print(f"[Gemini API ν€ μ‘°ν] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'API ν€ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_huggingface_token(): | |
| """Hugging Face ν ν° μ‘°ν""" | |
| try: | |
| from app.huggingface_client import get_huggingface_token | |
| token = get_huggingface_token() | |
| # 보μμ μν΄ λ§μ€νΉλ κ° λ°ν (μ²μ 8μλ§ νμ) | |
| masked_token = token[:8] + '...' if token and len(token) > 8 else '' | |
| return jsonify({ | |
| 'has_token': bool(token), | |
| 'masked_token': masked_token | |
| }), 200 | |
| except Exception as e: | |
| print(f"[Hugging Face ν ν° μ‘°ν] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'ν ν° μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_model_tokens(): | |
| """λͺ¨λ λͺ¨λΈμ ν ν° μ μ€μ μ‘°ν (μ λ ₯/μΆλ ₯ λΆλ¦¬)""" | |
| try: | |
| # Ollama λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° | |
| ollama_models = [] | |
| try: | |
| response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5) | |
| if response.status_code == 200: | |
| data = response.json() | |
| ollama_models = [model['name'] for model in data.get('models', [])] | |
| except: | |
| pass | |
| # Gemini λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° | |
| gemini_models = [] | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| gemini_models = gemini_client.get_available_models() | |
| gemini_models = [f'gemini:{m}' for m in gemini_models] | |
| except: | |
| pass | |
| # λͺ¨λ λͺ¨λΈ λͺ©λ‘ | |
| all_models = ollama_models + gemini_models | |
| # κ° λͺ¨λΈμ ν ν° μ μ€μ κ°μ Έμ€κΈ° (μ λ ₯/μΆλ ₯/Parent Chunk λΆλ¦¬) | |
| model_input_tokens = {} | |
| model_output_tokens = {} | |
| model_parent_chunk_tokens = {} | |
| default_input_tokens = {} | |
| default_output_tokens = {} | |
| default_parent_chunk_tokens = {} | |
| # λͺ¨λΈλ³ κΈ°λ³Έκ° κ²°μ | |
| def get_default_token_for_model(model_name, token_type='output'): | |
| """λͺ¨λΈλ³ κΈ°λ³Έ ν ν° μ κ²°μ """ | |
| if not model_name: | |
| if token_type == 'parent_chunk': | |
| return 8192 | |
| return 2000 if token_type == 'output' else 100000 | |
| # Gemini λͺ¨λΈμ κ²½μ° | |
| if model_name.startswith('gemini:'): | |
| if token_type == 'parent_chunk': | |
| return 8192 # Parent Chunk κΈ°λ³Έκ° | |
| return 2000 if token_type == 'output' else 100000 # Gemini μ λ ₯ κΈ°λ³Έκ°μ λ νΌ | |
| # Ollama λͺ¨λΈμ κ²½μ° | |
| if token_type == 'parent_chunk': | |
| return 8192 # Parent Chunk κΈ°λ³Έκ° | |
| return 2000 if token_type == 'output' else 100000 # Ollama μ λ ₯ κΈ°λ³Έκ°λ λ νΌ | |
| for model_name in all_models: | |
| # μ λ ₯ ν ν° μ€μ κ°μ Έμ€κΈ° | |
| input_config_key = f"model_token_input_{model_name}" | |
| input_token_value = SystemConfig.get_config(input_config_key) | |
| default_input_token = get_default_token_for_model(model_name, 'input') | |
| default_input_tokens[model_name] = default_input_token | |
| if input_token_value: | |
| try: | |
| model_input_tokens[model_name] = int(input_token_value) | |
| except (ValueError, TypeError): | |
| model_input_tokens[model_name] = None | |
| else: | |
| model_input_tokens[model_name] = None | |
| # μΆλ ₯ ν ν° μ€μ κ°μ Έμ€κΈ° | |
| output_config_key = f"model_token_output_{model_name}" | |
| output_token_value = SystemConfig.get_config(output_config_key) | |
| # νμ νΈνμ±: κΈ°μ‘΄ νμλ νμΈ | |
| if not output_token_value: | |
| old_config_key = f"model_token_{model_name}" | |
| output_token_value = SystemConfig.get_config(old_config_key) | |
| default_output_token = get_default_token_for_model(model_name, 'output') | |
| default_output_tokens[model_name] = default_output_token | |
| if output_token_value: | |
| try: | |
| model_output_tokens[model_name] = int(output_token_value) | |
| except (ValueError, TypeError): | |
| model_output_tokens[model_name] = None | |
| else: | |
| model_output_tokens[model_name] = None | |
| # Parent Chunk ν ν° μ€μ κ°μ Έμ€κΈ° | |
| parent_chunk_config_key = f"model_token_parent_chunk_{model_name}" | |
| parent_chunk_token_value = SystemConfig.get_config(parent_chunk_config_key) | |
| default_parent_chunk_token = get_default_token_for_model(model_name, 'parent_chunk') | |
| default_parent_chunk_tokens[model_name] = default_parent_chunk_token | |
| if parent_chunk_token_value: | |
| try: | |
| model_parent_chunk_tokens[model_name] = int(parent_chunk_token_value) | |
| except (ValueError, TypeError): | |
| model_parent_chunk_tokens[model_name] = None | |
| else: | |
| model_parent_chunk_tokens[model_name] = None | |
| return jsonify({ | |
| 'models': all_models, | |
| 'input_tokens': model_input_tokens, | |
| 'output_tokens': model_output_tokens, | |
| 'parent_chunk_tokens': model_parent_chunk_tokens, | |
| 'default_input_tokens': default_input_tokens, | |
| 'default_output_tokens': default_output_tokens, | |
| 'default_parent_chunk_tokens': default_parent_chunk_tokens | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'ν ν° μ μ€μ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def save_model_tokens(): | |
| """λͺ¨λΈλ³ ν ν° μ μ€μ μ μ₯ (μ λ ₯/μΆλ ₯ λΆλ¦¬, λλ μμ )""" | |
| try: | |
| data = request.json | |
| model_name = data.get('model_name', '').strip() | |
| token_type = data.get('token_type', 'output').strip() # 'input' λλ 'output' | |
| tokens = data.get('tokens', None) | |
| if not model_name: | |
| return jsonify({'error': 'λͺ¨λΈλͺ μ μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| if token_type not in ['input', 'output', 'parent_chunk']: | |
| return jsonify({'error': 'ν ν° νμ μ "input", "output", λλ "parent_chunk"μ΄μ΄μΌ ν©λλ€.'}), 400 | |
| # tokensκ° Noneμ΄λ©΄ μ€μ μμ (κΈ°λ³Έκ° μ¬μ©) | |
| if tokens is None: | |
| try: | |
| config_key = f"model_token_{token_type}_{model_name}" | |
| config = SystemConfig.query.filter_by(key=config_key).first() | |
| if config: | |
| db.session.delete(config) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': f'{model_name} λͺ¨λΈμ {token_type} ν ν° μ μ€μ μ΄ μμ λμμ΅λλ€. κΈ°λ³Έκ°μ μ¬μ©ν©λλ€.', | |
| 'model_name': model_name, | |
| 'token_type': token_type, | |
| 'tokens': None | |
| }), 200 | |
| else: | |
| # νμ νΈνμ±: κΈ°μ‘΄ νμλ μμ μλ (μΆλ ₯ ν ν°μΈ κ²½μ°) | |
| if token_type == 'output': | |
| old_config_key = f"model_token_{model_name}" | |
| old_config = SystemConfig.query.filter_by(key=old_config_key).first() | |
| if old_config: | |
| db.session.delete(old_config) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': f'{model_name} λͺ¨λΈμ μΆλ ₯ ν ν° μ μ€μ μ΄ μμ λμμ΅λλ€. κΈ°λ³Έκ°μ μ¬μ©ν©λλ€.', | |
| 'model_name': model_name, | |
| 'token_type': token_type, | |
| 'tokens': None | |
| }), 200 | |
| return jsonify({ | |
| 'message': f'{model_name} λͺ¨λΈμ μ΄λ―Έ κΈ°λ³Έκ°μ μ¬μ©νκ³ μμ΅λλ€.', | |
| 'model_name': model_name, | |
| 'token_type': token_type, | |
| 'tokens': None | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ€μ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| try: | |
| tokens = int(tokens) | |
| if tokens < 1: | |
| return jsonify({'error': 'ν ν° μλ 1 μ΄μμ΄μ΄μΌ ν©λλ€.'}), 400 | |
| except (ValueError, TypeError): | |
| return jsonify({'error': 'ν ν° μλ μ μμ¬μΌ ν©λλ€.'}), 400 | |
| # SystemConfigμ μ μ₯ | |
| config_key = f"model_token_{token_type}_{model_name}" | |
| SystemConfig.set_config(config_key, str(tokens), f'{model_name} λͺ¨λΈ {token_type} ν ν° μ μ ν') | |
| return jsonify({ | |
| 'message': f'{model_name} λͺ¨λΈμ {token_type} ν ν° μκ° {tokens}λ‘ μ€μ λμμ΅λλ€.', | |
| 'model_name': model_name, | |
| 'token_type': token_type, | |
| 'tokens': tokens | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[ν ν° μ μ μ₯] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'ν ν° μ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def set_gemini_api_key(): | |
| """Gemini API ν€ μ μ₯/μ λ°μ΄νΈ""" | |
| try: | |
| if not request.is_json: | |
| return jsonify({'error': 'Content-Typeμ΄ application/jsonμ΄ μλλλ€.'}), 400 | |
| data = request.json | |
| if not data: | |
| return jsonify({'error': 'μμ² λ°μ΄ν°κ° μμ΅λλ€.'}), 400 | |
| api_key = data.get('api_key', '').strip() | |
| if not api_key: | |
| return jsonify({'error': 'API ν€λ₯Ό μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| # API ν€ μ μ₯ (SystemConfig.set_config λ΄λΆμμ ν μ΄λΈ μμ± μ²λ¦¬) | |
| try: | |
| SystemConfig.set_config( | |
| key='gemini_api_key', | |
| value=api_key, | |
| description='Google Gemini API ν€' | |
| ) | |
| # μ μ₯ νμΈ | |
| saved_key = SystemConfig.get_config('gemini_api_key', '') | |
| if saved_key == api_key: | |
| print(f"[Gemini API ν€ μ μ₯] μ±κ³΅: μ μ₯ νμΈλ¨ (κΈΈμ΄: {len(api_key)}μ)") | |
| else: | |
| print(f"[Gemini API ν€ μ μ₯] κ²½κ³ : μ μ₯ ν νμΈ μ€ν¨. μ μ₯λ κ°: {saved_key[:20] if saved_key else 'None'}...") | |
| except Exception as save_error: | |
| print(f"[Gemini API ν€ μ μ₯] μ€λ₯: {save_error}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'API ν€ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(save_error)}'}), 500 | |
| # Gemini ν΄λΌμ΄μΈνΈμ API ν€ μ¬λ‘λ μλ¦Ό | |
| try: | |
| from app.gemini_client import reset_gemini_client | |
| reset_gemini_client() | |
| print(f"[Gemini] API ν€κ° μ λ°μ΄νΈλμ΄ ν΄λΌμ΄μΈνΈκ° μ¬λ‘λλμμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[Gemini] API ν€ μ¬λ‘λ μ€ν¨: {e}") | |
| # μ΅μ’ νμΈ: DBμμ μ€μ λ‘ μ μ₯λμλμ§ νμΈ | |
| final_check = SystemConfig.get_config('gemini_api_key', '') | |
| if not final_check: | |
| print(f"[Gemini API ν€ μ μ₯] κ²½κ³ : μ μ₯ ν DBμμ μ‘°ν μ€ν¨") | |
| return jsonify({ | |
| 'error': 'API ν€ μ μ₯ ν νμΈμ μ€ν¨νμ΅λλ€. λ°μ΄ν°λ² μ΄μ€ μ°κ²°μ νμΈνμΈμ.', | |
| 'saved': False | |
| }), 500 | |
| return jsonify({ | |
| 'message': 'Gemini API ν€κ° μ±κ³΅μ μΌλ‘ μ μ₯λμμ΅λλ€.', | |
| 'has_api_key': True, | |
| 'masked_key': api_key[:8] + '...' if api_key and len(api_key) > 8 else '', | |
| 'saved': True, | |
| 'config_count': SystemConfig.query.count() # νμ¬ μ€μ κ°μ λ°ν | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[Gemini API ν€ μ μ₯] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'API ν€ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def set_huggingface_token(): | |
| """Hugging Face ν ν° μ μ₯/μ λ°μ΄νΈ""" | |
| try: | |
| if not request.is_json: | |
| return jsonify({'error': 'Content-Typeμ΄ application/jsonμ΄ μλλλ€.'}), 400 | |
| data = request.json | |
| if not data: | |
| return jsonify({'error': 'μμ² λ°μ΄ν°κ° μμ΅λλ€.'}), 400 | |
| token = data.get('token', '').strip() | |
| if not token: | |
| return jsonify({'error': 'ν ν°μ μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| # ν ν° μ μ₯ (SystemConfig.set_config λ΄λΆμμ ν μ΄λΈ μμ± μ²λ¦¬) | |
| SystemConfig.set_config( | |
| key='huggingface_token', | |
| value=token, | |
| description='Hugging Face API ν ν°' | |
| ) | |
| # Hugging Face ν΄λΌμ΄μΈνΈμ ν ν° μ¬λ‘λ μλ¦Ό | |
| try: | |
| from app.huggingface_client import reset_huggingface_token | |
| reset_huggingface_token() | |
| print(f"[Hugging Face] ν ν°μ΄ μ λ°μ΄νΈλμ΄ ν΄λΌμ΄μΈνΈκ° μ¬λ‘λλμμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[Hugging Face] ν ν° μ¬λ‘λ μ€ν¨: {e}") | |
| return jsonify({ | |
| 'message': 'Hugging Face ν ν°μ΄ μ±κ³΅μ μΌλ‘ μ μ₯λμμ΅λλ€.', | |
| 'has_token': True | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[Hugging Face ν ν° μ μ₯] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'ν ν° μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_ollama_models(): | |
| """Ollama λ° Geminiμμ μ¬μ© κ°λ₯ν λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (λ‘컬 AI λͺ¨λΈμ νμ΅λ μΉμμ€μ΄ μλ λͺ¨λΈλ§ νμ)""" | |
| try: | |
| # 쿼리 νλΌλ―Έν°λ‘ all=trueκ° μ λ¬λλ©΄ λͺ¨λ λͺ¨λΈ λ°ν | |
| show_all = request.args.get('all', 'false').lower() == 'true' | |
| all_models = [] | |
| # 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° | |
| try: | |
| response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5) | |
| if response.status_code == 200: | |
| data = response.json() | |
| ollama_models_raw = [model['name'] for model in data.get('models', [])] | |
| if show_all: | |
| # λͺ¨λ Ollama λͺ¨λΈ λ°ν | |
| ollama_models = [{'name': model_name, 'type': 'ollama'} for model_name in ollama_models_raw] | |
| all_models.extend(ollama_models) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(ollama_models)}κ° μΆκ° (μ 체 λͺ©λ‘)") | |
| else: | |
| # νμ΅λ μΉμμ€μ΄ μλ λͺ¨λΈλ§ νν°λ§ | |
| filtered_ollama_models = [] | |
| for model_name in ollama_models_raw: | |
| # ν΄λΉ λͺ¨λΈλ‘ νμ΅λ μλ³Έ νμΌμ΄ μλμ§ νμΈ (parent_file_idκ° NoneμΈ νμΌλ§) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=model_name, | |
| parent_file_id=None | |
| ).count() | |
| if file_count > 0: | |
| filtered_ollama_models.append({'name': model_name, 'type': 'ollama'}) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| else: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - νμ΅λ μΉμμ€ μμ, λͺ©λ‘μμ μ μΈ") | |
| all_models.extend(filtered_ollama_models) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(filtered_ollama_models)}κ° μΆκ° (μ 체 {len(ollama_models_raw)}κ° μ€ {len(filtered_ollama_models)}κ° νν°λ§λ¨)") | |
| except Exception as e: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| # 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| gemini_models = gemini_client.get_available_models() | |
| if show_all: | |
| # λͺ¨λ Gemini λͺ¨λΈ λ°ν | |
| gemini_models_list = [{'name': f'gemini:{model_name}', 'type': 'gemini'} for model_name in gemini_models] | |
| all_models.extend(gemini_models_list) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(gemini_models_list)}κ° μΆκ° (μ 체 λͺ©λ‘)") | |
| else: | |
| # νμ΅λ μΉμμ€μ΄ μλ λͺ¨λΈλ§ νν°λ§ | |
| filtered_gemini_models = [] | |
| for model_name in gemini_models: | |
| full_model_name = f'gemini:{model_name}' | |
| # ν΄λΉ λͺ¨λΈλ‘ νμ΅λ μλ³Έ νμΌμ΄ μλμ§ νμΈ (parent_file_idκ° NoneμΈ νμΌλ§) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=full_model_name, | |
| parent_file_id=None | |
| ).count() | |
| if file_count > 0: | |
| filtered_gemini_models.append({'name': full_model_name, 'type': 'gemini'}) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| else: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - νμ΅λ μΉμμ€ μμ, λͺ©λ‘μμ μ μΈ") | |
| all_models.extend(filtered_gemini_models) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(filtered_gemini_models)}κ° μΆκ° (μ 체 {len(gemini_models)}κ° μ€ {len(filtered_gemini_models)}κ° νν°λ§λ¨)") | |
| else: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini API ν€κ° μ€μ λμ§ μμ Gemini λͺ¨λΈμ λΆλ¬μ¬ μ μμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| if all_models: | |
| return jsonify({'models': all_models}) | |
| else: | |
| return jsonify({'error': 'μ¬μ© κ°λ₯ν λͺ¨λΈμ΄ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§, λλ Gemini API ν€κ° μ€μ λμλμ§ νμΈνμΈμ.', 'models': []}), 500 | |
| except Exception as e: | |
| return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'models': []}), 500 | |
| def get_system_prompt(): | |
| """μμ€ν ν둬ννΈ κ°μ Έμ€κΈ°""" | |
| try: | |
| prompt = SystemConfig.get_config('system_prompt', '') | |
| return jsonify({'prompt': prompt}), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'ν둬ννΈλ₯Ό κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def save_system_prompt(): | |
| """μμ€ν ν둬ννΈ μ μ₯""" | |
| try: | |
| data = request.json | |
| prompt = data.get('prompt', '').strip() | |
| SystemConfig.set_config( | |
| key='system_prompt', | |
| value=prompt, | |
| description='μ§λ¬Έν λ μλμΌλ‘ λΆμ΄λ μμ€ν ν둬ννΈ' | |
| ) | |
| return jsonify({ | |
| 'message': 'ν둬ννΈκ° μ±κ³΅μ μΌλ‘ μ μ₯λμμ΅λλ€.', | |
| 'prompt': prompt | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'ν둬ννΈ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_database_status(): | |
| """λ°μ΄ν°λ² μ΄μ€ μ°κ²° μν νμΈ""" | |
| try: | |
| from flask import current_app | |
| from sqlalchemy import create_engine, text | |
| from datetime import datetime | |
| db_uri = current_app.config['SQLALCHEMY_DATABASE_URI'] | |
| is_postgresql = db_uri.startswith('postgresql://') or db_uri.startswith('postgres://') | |
| # μ°κ²° μ 보 (보μμ μν΄ λΉλ°λ²νΈ λ§μ€νΉ) | |
| if is_postgresql and '@' in db_uri: | |
| masked_uri = db_uri.split('@')[0].split('://')[0] + '://***@' + '@'.join(db_uri.split('@')[1:]) | |
| else: | |
| masked_uri = db_uri | |
| status = { | |
| 'connected': False, | |
| 'type': 'PostgreSQL' if is_postgresql else 'SQLite', | |
| 'uri_masked': masked_uri, | |
| 'version': None, | |
| 'error': None, | |
| 'test_query': None, | |
| 'table_count': 0, | |
| 'user_count': 0, | |
| 'config_count': 0 | |
| } | |
| # μ°κ²° ν μ€νΈ | |
| try: | |
| if is_postgresql: | |
| # PostgreSQL μ°κ²° ν μ€νΈ | |
| engine = create_engine(db_uri) | |
| with engine.connect() as conn: | |
| # λ²μ νμΈ | |
| result = conn.execute(text("SELECT version()")) | |
| version = result.fetchone()[0] | |
| status['version'] = version[:100] # μ²μ 100μλ§ | |
| # ν μ΄λΈ κ°μ νμΈ | |
| result = conn.execute(text(""" | |
| SELECT COUNT(*) | |
| FROM information_schema.tables | |
| WHERE table_schema = 'public' | |
| """)) | |
| status['table_count'] = result.fetchone()[0] | |
| # μ¬μ©μ κ°μ νμΈ | |
| result = conn.execute(text("SELECT COUNT(*) FROM \"user\"")) | |
| status['user_count'] = result.fetchone()[0] | |
| # μ€μ κ°μ νμΈ | |
| result = conn.execute(text("SELECT COUNT(*) FROM system_config")) | |
| status['config_count'] = result.fetchone()[0] | |
| # ν μ€νΈ 쿼리 | |
| result = conn.execute(text("SELECT NOW()")) | |
| test_time = result.fetchone()[0] | |
| status['test_query'] = f"νμ¬ μκ°: {test_time}" | |
| status['connected'] = True | |
| else: | |
| # SQLite μ°κ²° ν μ€νΈ | |
| from sqlalchemy import inspect | |
| inspector = inspect(db.engine) | |
| tables = inspector.get_table_names() | |
| status['table_count'] = len(tables) | |
| # μ¬μ©μ κ°μ νμΈ | |
| user_count = User.query.count() | |
| status['user_count'] = user_count | |
| # μ€μ κ°μ νμΈ | |
| config_count = SystemConfig.query.count() | |
| status['config_count'] = config_count | |
| # SQLite λ²μ νμΈ | |
| with db.engine.connect() as conn: | |
| result = conn.execute(text("SELECT sqlite_version()")) | |
| version = result.fetchone()[0] | |
| status['version'] = f"SQLite {version}" | |
| # ν μ€νΈ 쿼리 | |
| result = conn.execute(text("SELECT datetime('now')")) | |
| test_time = result.fetchone()[0] | |
| status['test_query'] = f"νμ¬ μκ°: {test_time}" | |
| status['connected'] = True | |
| except Exception as e: | |
| status['error'] = str(e) | |
| status['connected'] = False | |
| return jsonify(status), 200 if status['connected'] else 500 | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({ | |
| 'error': f'λ°μ΄ν°λ² μ΄μ€ μν νμΈ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', | |
| 'connected': False | |
| }), 500 | |
| def get_all_ollama_models(): | |
| """κ΄λ¦¬μμ©: Ollama λ° Geminiμμ μ¬μ© κ°λ₯ν λͺ¨λ λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νν°λ§ μμ΄ μ 체 λͺ©λ‘)""" | |
| try: | |
| all_models = [] | |
| # 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νν°λ§ μμ΄ μ 체 λͺ©λ‘) | |
| try: | |
| response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5) | |
| if response.status_code == 200: | |
| data = response.json() | |
| ollama_models_raw = [model['name'] for model in data.get('models', [])] | |
| # νν°λ§ μμ΄ λͺ¨λ Ollama λͺ¨λΈ μΆκ° | |
| for model_name in ollama_models_raw: | |
| # κ° λͺ¨λΈμ νμ΅λ μΉμμ€ κ°μ νμΈ (μ 보 μ 곡μ©) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=model_name, | |
| parent_file_id=None | |
| ).count() | |
| all_models.append({ | |
| 'name': model_name, | |
| 'type': 'ollama', | |
| 'file_count': file_count # μ 보 μ κ³΅μ© | |
| }) | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(ollama_models_raw)}κ° μΆκ°") | |
| except Exception as e: | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| # 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νν°λ§ μμ΄ μ 체 λͺ©λ‘) | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| gemini_models = gemini_client.get_available_models() | |
| # νν°λ§ μμ΄ λͺ¨λ Gemini λͺ¨λΈ μΆκ° | |
| for model_name in gemini_models: | |
| full_model_name = f'gemini:{model_name}' | |
| # κ° λͺ¨λΈμ νμ΅λ μΉμμ€ κ°μ νμΈ (μ 보 μ 곡μ©) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=full_model_name, | |
| parent_file_id=None | |
| ).count() | |
| all_models.append({ | |
| 'name': full_model_name, | |
| 'type': 'gemini', | |
| 'file_count': file_count # μ 보 μ κ³΅μ© | |
| }) | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(gemini_models)}κ° μΆκ°") | |
| else: | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini API ν€κ° μ€μ λμ§ μμ Gemini λͺ¨λΈμ λΆλ¬μ¬ μ μμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| if all_models: | |
| return jsonify({'models': all_models}) | |
| else: | |
| return jsonify({'error': 'μ¬μ© κ°λ₯ν λͺ¨λΈμ΄ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§, λλ Gemini API ν€κ° μ€μ λμλμ§ νμΈνμΈμ.', 'models': []}), 500 | |
| except Exception as e: | |
| return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'models': []}), 500 | |
| def chat(): | |
| """μ±ν API μλν¬μΈνΈ""" | |
| try: | |
| data = request.json | |
| message = data.get('message', '') | |
| # νμ νΈνμ±μ μν΄ modelλ νμΈ (κΈ°μ‘΄ μ½λ) | |
| analysis_model = data.get('analysis_model', data.get('model', '')) # μ§λ¬Έ λΆμμ© λͺ¨λΈ | |
| answer_model = data.get('answer_model', '') # μ΅μ’ λ΅λ³μ© λͺ¨λΈ | |
| file_ids = [int(fid) for fid in data.get('file_ids', []) if fid] # μ νν μΉμμ€ νμΌ ID λͺ©λ‘ | |
| session_id = data.get('session_id', None) # λν μΈμ ID (μ μλ‘ λ³ν) | |
| if not message: | |
| return jsonify({'error': 'λ©μμ§κ° νμν©λλ€.'}), 400 | |
| # λ΅λ³μ© λͺ¨λΈμ΄ μμΌλ©΄ λΆμμ© λͺ¨λΈ μ¬μ© (νμ νΈνμ±) | |
| if not answer_model: | |
| answer_model = analysis_model | |
| # λ΅λ³μ© λͺ¨λΈμ΄ μ¬μ ν μμΌλ©΄ μλ¬ λ°ν | |
| if not answer_model: | |
| return jsonify({'error': 'λ΅λ³μ μμ±ν AI λͺ¨λΈμ΄ μ νλμ§ μμμ΅λλ€. "μ¬μ© κ°λ₯ν AI λͺ©λ‘"μμ λ΅λ³μ μμ±ν AI λͺ¨λΈμ μ νν΄μ£ΌμΈμ.'}), 400 | |
| # λΆμμ© λͺ¨λΈμ΄ μ νλ κ²½μ° RAG κ²μ μ§ν | |
| if analysis_model: | |
| try: | |
| # RAG: μ§λ¬Έκ³Ό κ΄λ ¨λ μ²ν¬ κ²μ | |
| context = "" | |
| use_rag = True # RAG μ¬μ© μ¬λΆ | |
| if use_rag: | |
| print(f"\n[RAG κ²μ] λΆμ λͺ¨λΈ: {analysis_model}, λ΅λ³ λͺ¨λΈ: {answer_model}, μ§λ¬Έ: {message[:50]}...") | |
| print(f"[RAG κ²μ] μ νλ νμΌ ID: {file_ids if file_ids else 'μμ (λͺ¨λ νμΌ κ²μ)'}") | |
| # 1λ¨κ³: νμ°¨λ³ λΆμ(EpisodeAnalysis) μ‘°ν (νμ°¨λ³ μμ½ μ°Έμ‘°μ©) | |
| episode_analyses = [] | |
| if file_ids: | |
| print(f"[RAG κ²μ 1λ¨κ³] νμ°¨λ³ λΆμ μ‘°ν μμ...") | |
| episode_analyses = get_episode_analyses_for_files(file_ids) | |
| print(f"[RAG κ²μ 1λ¨κ³] νμ°¨λ³ λΆμ μ‘°ν μλ£: {len(episode_analyses)}κ° νμΌ") | |
| # 2λ¨κ³: GraphRAG λ°μ΄ν° μ‘°ν (μν°ν°, κ΄κ³, μ¬κ±΄) | |
| graph_data = None | |
| if file_ids: | |
| print(f"[RAG κ²μ 2λ¨κ³] GraphRAG λ°μ΄ν° μ‘°ν μμ...") | |
| graph_data = get_relevant_graph_data( | |
| query=message, | |
| file_ids=file_ids | |
| ) | |
| print(f"[RAG κ²μ 2λ¨κ³] GraphRAG λ°μ΄ν° μ‘°ν μλ£: μν°ν° {len(graph_data['entities'])}κ°, κ΄κ³ {len(graph_data['relationships'])}κ°, μ¬κ±΄ {len(graph_data['events'])}κ°") | |
| # 3λ¨κ³: λ²‘ν° κ²μ + 리λνΉμΌλ‘ Child Chunk μ λ° κ²μ (λΆμ λͺ¨λΈ μ¬μ©) | |
| print(f"[RAG κ²μ 3λ¨κ³] λ²‘ν° κ²μ + 리λνΉ μμ (λΆμ λͺ¨λΈ: {analysis_model})...") | |
| relevant_chunks = search_relevant_chunks( | |
| query=message, | |
| file_ids=file_ids if file_ids else None, | |
| model_name=analysis_model, # μ§λ¬Έ λΆμμ analysis_model μ¬μ© | |
| top_k=5, # 리λνΉ ν μμ 5κ°λ§ μ ν | |
| min_score=0.5 # μ΅μ μ μ μκ³κ° | |
| ) | |
| print(f"[RAG κ²μ 3λ¨κ³] λ²‘ν° κ²μ + 리λνΉ μλ£: {len(relevant_chunks)}κ° μ²ν¬ (μμ 5κ°)") | |
| # 컨ν μ€νΈ κ΅¬μ± | |
| context_parts = [] | |
| # GraphRAG λ°μ΄ν° μΆκ° (μν°ν°, κ΄κ³, μ¬κ±΄ μ 보) | |
| if graph_data and (graph_data['entities'] or graph_data['relationships'] or graph_data['events']): | |
| graph_context_parts = [] | |
| # μν°ν° μ 보 | |
| if graph_data['entities']: | |
| entity_sections = {} | |
| for entity in graph_data['entities']: | |
| episode = entity.get('episode_title', 'κΈ°ν') | |
| if episode not in entity_sections: | |
| entity_sections[episode] = {'characters': [], 'locations': []} | |
| if entity.get('entity_type') == 'character': | |
| entity_sections[episode]['characters'].append(entity) | |
| elif entity.get('entity_type') == 'location': | |
| entity_sections[episode]['locations'].append(entity) | |
| entity_text = "λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ λ±μ₯μΈλ¬Ό λ° μ₯μ μ 보μ λλ€:\n\n" | |
| for episode, entities in entity_sections.items(): | |
| entity_text += f"=== {episode} ===\n" | |
| if entities['characters']: | |
| entity_text += "μΈλ¬Ό:\n" | |
| for char in entities['characters']: | |
| entity_text += f"- {char.get('entity_name', '')}" | |
| if char.get('role'): | |
| entity_text += f" (μν : {char.get('role')})" | |
| if char.get('description'): | |
| entity_text += f": {char.get('description')}" | |
| entity_text += "\n" | |
| if entities['locations']: | |
| entity_text += "μ₯μ:\n" | |
| for loc in entities['locations']: | |
| entity_text += f"- {loc.get('entity_name', '')}" | |
| if loc.get('category'): | |
| entity_text += f" (μ ν: {loc.get('category')})" | |
| if loc.get('description'): | |
| entity_text += f": {loc.get('description')}" | |
| entity_text += "\n" | |
| entity_text += "\n" | |
| graph_context_parts.append(entity_text) | |
| # κ΄κ³ μ 보 | |
| if graph_data['relationships']: | |
| rel_sections = {} | |
| for rel in graph_data['relationships']: | |
| episode = rel.get('episode_title', 'κΈ°ν') | |
| if episode not in rel_sections: | |
| rel_sections[episode] = [] | |
| rel_sections[episode].append(rel) | |
| rel_text = "λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μΈλ¬Ό/μ₯μ κ°μ κ΄κ³ μ 보μ λλ€:\n\n" | |
| for episode, rels in rel_sections.items(): | |
| rel_text += f"=== {episode} ===\n" | |
| for rel in rels: | |
| rel_text += f"- {rel.get('source', '')} β {rel.get('target', '')}" | |
| if rel.get('relationship_type'): | |
| rel_text += f" ({rel.get('relationship_type')})" | |
| if rel.get('description'): | |
| rel_text += f": {rel.get('description')}" | |
| if rel.get('event'): | |
| rel_text += f" [κ΄λ ¨ μ¬κ±΄: {rel.get('event')}]" | |
| rel_text += "\n" | |
| rel_text += "\n" | |
| graph_context_parts.append(rel_text) | |
| # μ¬κ±΄ μ 보 | |
| if graph_data['events']: | |
| event_sections = {} | |
| for event in graph_data['events']: | |
| episode = event.get('episode_title', 'κΈ°ν') | |
| if episode not in event_sections: | |
| event_sections[episode] = [] | |
| event_sections[episode].append(event) | |
| event_text = "λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μ£Όμ μ¬κ±΄ μ 보μ λλ€:\n\n" | |
| for episode, events in event_sections.items(): | |
| event_text += f"=== {episode} ===\n" | |
| for event in events: | |
| if event.get('event_name'): | |
| event_text += f"- {event.get('event_name')}\n" | |
| if event.get('description'): | |
| event_text += f" μ€λͺ : {event.get('description')}\n" | |
| if event.get('participants') and len(event.get('participants', [])) > 0: | |
| event_text += f" κ΄λ ¨ μΈλ¬Ό: {', '.join(event.get('participants', []))}\n" | |
| if event.get('location'): | |
| event_text += f" μ₯μ: {event.get('location')}\n" | |
| if event.get('significance'): | |
| event_text += f" μ€μλ: {event.get('significance')}\n" | |
| event_text += "\n" | |
| graph_context_parts.append(event_text) | |
| if graph_context_parts: | |
| graph_context = "\n\n".join(graph_context_parts) | |
| context_parts.append(f"λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ GraphRAG λ°μ΄ν°μ λλ€ (μν°ν°, κ΄κ³, μ¬κ±΄ μ 보):\n\n{graph_context}") | |
| print(f"[RAG κ²μ] GraphRAG 컨ν μ€νΈ μΆκ°: {len(graph_context)}μ") | |
| # νμ°¨λ³ λΆμ μ 보 μΆκ° (νμ°¨λ³ μμ½ μ°Έμ‘°μ©) | |
| if episode_analyses: | |
| episode_context_sections = [] | |
| for episode_analysis in episode_analyses: | |
| file = episode_analysis.file | |
| file_info = f"\n=== {file.original_filename} νμ°¨λ³ λΆμ ===\n" | |
| if episode_analysis.analysis_content: | |
| episode_context_sections.append(file_info + episode_analysis.analysis_content) | |
| if episode_context_sections: | |
| episode_context = "\n\n".join(episode_context_sections) | |
| context_parts.append(f"λ€μμ μΉμμ€μ νμ°¨λ³ μμΈ λΆμ λ΄μ©μ λλ€:\n\n{episode_context}") | |
| print(f"[RAG κ²μ] νμ°¨λ³ λΆμ 컨ν μ€νΈ μΆκ°: {len(episode_context)}μ") | |
| # Child Chunk μ 보 μΆκ° (μ λ° κ²μ κ²°κ³Ό) | |
| if relevant_chunks: | |
| child_context_parts = [] | |
| seen_files = set() | |
| for chunk in relevant_chunks: | |
| file = chunk.file | |
| if file.original_filename not in seen_files: | |
| seen_files.add(file.original_filename) | |
| print(f"[RAG κ²μ] μ¬μ©λ νμΌ: {file.original_filename} (λͺ¨λΈ: {file.model_name})") | |
| child_context_parts.append(f"[{file.original_filename} - μ²ν¬ {chunk.chunk_index + 1}]\n{chunk.content}") | |
| if child_context_parts: | |
| # 컨ν μ€νΈ κΈΈμ΄ νμΈ λ° μ΅μ ν | |
| full_child_context = "\n\n".join(child_context_parts) | |
| child_context_length = len(full_child_context) | |
| # Child Chunk 컨ν μ€νΈκ° λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 15000μ) | |
| if child_context_length > 15000: | |
| truncated_parts = [] | |
| current_length = 0 | |
| for part in child_context_parts: | |
| if current_length + len(part) > 15000: | |
| break | |
| truncated_parts.append(part) | |
| current_length += len(part) | |
| full_child_context = "\n\n".join(truncated_parts) | |
| print(f"[RAG κ²μ] Child Chunk 컨ν μ€νΈ κΈΈμ΄ μ‘°μ : {child_context_length}μ β {len(full_child_context)}μ") | |
| context_parts.append(f"λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μΉμμ€μ ꡬ체μ μΈ λ΄μ©μ λλ€ (μ λ° κ²μ κ²°κ³Ό, μ΄ {len(relevant_chunks)}κ° μ²ν¬):\n\n{full_child_context}") | |
| print(f"[RAG κ²μ] Child Chunk 컨ν μ€νΈ μΆκ°: {len(full_child_context)}μ") | |
| # μ΅μ’ 컨ν μ€νΈ κ΅¬μ± | |
| if context_parts: | |
| full_context = "\n\n" + "\n\n---\n\n".join(context_parts) + "\n\n" | |
| # νμ°¨λ³ λΆμ, GraphRAG, Child Chunk λͺ¨λ μλ κ²½μ° | |
| has_graph = graph_data and (graph_data['entities'] or graph_data['relationships'] or graph_data['events']) | |
| if episode_analyses and has_graph and relevant_chunks: | |
| context = f"""λ€μμ μ§λ¬Έμ λ΅νκΈ° μν μΉμμ€ μ 보μ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ λ΅λ³ν΄μ£ΌμΈμ: | |
| - λ¨Όμ GraphRAG λ°μ΄ν°(μν°ν°, κ΄κ³, μ¬κ±΄)λ₯Ό νμΈνμ¬ λ±μ₯μΈλ¬Ό, μ₯μ, μΈλ¬Ό κ°μ κ΄κ³, μ£Όμ μ¬κ±΄μ νμ νμΈμ. | |
| - κ·Έ λ€μ νμ°¨λ³ λΆμ λ΄μ©μ μ΄ν΄νμ¬ κ° νμ°¨μ μ£Όμ μ€ν 리, λ±μ₯ μΈλ¬Ό, μΈλ¬Ό κ΄κ³ λ³νλ₯Ό νμ νμΈμ. | |
| - λ§μ§λ§μΌλ‘ ꡬ체μ μΈ λ΄μ©(Child Chunk)μ ν΅ν΄ μ§λ¬Έμ λν μ νν λ΅λ³μ μ 곡νμΈμ. | |
| - μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ μΌκ΄μ± μλ λ΅λ³μ μμ±νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| elif episode_analyses and relevant_chunks: | |
| context = f"""λ€μμ μ§λ¬Έμ λ΅νκΈ° μν μΉμμ€ μ 보μ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ λ΅λ³ν΄μ£ΌμΈμ: | |
| - λ¨Όμ νμ°¨λ³ λΆμ λ΄μ©μ μ΄ν΄νμ¬ κ° νμ°¨μ μ£Όμ μ€ν 리, λ±μ₯ μΈλ¬Ό, μΈλ¬Ό κ΄κ³ λ³νλ₯Ό νμ νμΈμ. | |
| - κ·Έ λ€μ ꡬ체μ μΈ λ΄μ©(Child Chunk)μ ν΅ν΄ μ§λ¬Έμ λν μ νν λ΅λ³μ μ 곡νμΈμ. | |
| - μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ μΌκ΄μ± μλ λ΅λ³μ μμ±νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| elif has_graph and relevant_chunks: | |
| context = f"""λ€μμ μ§λ¬Έμ λ΅νκΈ° μν μΉμμ€ μ 보μ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ λ΅λ³ν΄μ£ΌμΈμ: | |
| - λ¨Όμ GraphRAG λ°μ΄ν°(μν°ν°, κ΄κ³, μ¬κ±΄)λ₯Ό νμΈνμ¬ λ±μ₯μΈλ¬Ό, μ₯μ, μΈλ¬Ό κ°μ κ΄κ³, μ£Όμ μ¬κ±΄μ νμ νμΈμ. | |
| - κ·Έ λ€μ ꡬ체μ μΈ λ΄μ©(Child Chunk)μ ν΅ν΄ μ§λ¬Έμ λν μ νν λ΅λ³μ μ 곡νμΈμ. | |
| - μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ μΌκ΄μ± μλ λ΅λ³μ μμ±νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| elif episode_analyses and has_graph: | |
| # νμ°¨λ³ λΆμκ³Ό GraphRAGλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μΉμμ€μ νμ°¨λ³ μμΈ λΆμ λ° GraphRAG λ°μ΄ν°μ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ: | |
| - GraphRAG λ°μ΄ν°(μν°ν°, κ΄κ³, μ¬κ±΄)λ₯Ό νμΈνμ¬ λ±μ₯μΈλ¬Ό, μ₯μ, μΈλ¬Ό κ°μ κ΄κ³, μ£Όμ μ¬κ±΄μ νμ νμΈμ. | |
| - νμ°¨λ³ λΆμ λ΄μ©μ μ΄ν΄νμ¬ κ° νμ°¨μ μ£Όμ μ€ν 리, λ±μ₯ μΈλ¬Ό, μΈλ¬Ό κ΄κ³ λ³νλ₯Ό κ³ λ €νμ¬ λ΅λ³νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| elif episode_analyses: | |
| # νμ°¨λ³ λΆμλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μΉμμ€μ νμ°¨λ³ μμΈ λΆμ λ΄μ©μ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ. κ° νμ°¨μ μ£Όμ μ€ν 리, λ±μ₯ μΈλ¬Ό, μΈλ¬Ό κ΄κ³ λ³νλ₯Ό κ³ λ €νμ¬ λ΅λ³νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| elif has_graph: | |
| # GraphRAGλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ GraphRAG λ°μ΄ν°μ λλ€ (μν°ν°, κ΄κ³, μ¬κ±΄ μ 보): | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ: | |
| - GraphRAG λ°μ΄ν°λ₯Ό νμΈνμ¬ λ±μ₯μΈλ¬Ό, μ₯μ, μΈλ¬Ό κ°μ κ΄κ³, μ£Όμ μ¬κ±΄μ νμ νμΈμ. | |
| - μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ μΌκ΄μ± μλ λ΅λ³μ μμ±νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| else: | |
| # Child Chunkλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μΉμμ€μ ꡬ체μ μΈ λ΄μ©μ λλ€: | |
| {full_context} | |
| μ λ΄μ©μ μΆ©λΆν μ°Έκ³ νμ¬ λ€μ μ§λ¬Έμ μ ννκ³ μμΈνκ² λ΅λ³ν΄μ£ΌμΈμ. μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ λ΅λ³ν΄μ£ΌμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| context += message | |
| graph_info = f", GraphRAG: {len(graph_data['entities']) if graph_data else 0}κ° μν°ν°, {len(graph_data['relationships']) if graph_data else 0}κ° κ΄κ³, {len(graph_data['events']) if graph_data else 0}κ° μ¬κ±΄" if graph_data else "" | |
| print(f"[RAG κ²μ] μ΅μ’ 컨ν μ€νΈ μμ± μλ£ (νμ°¨λ³ λΆμ: {len(episode_analyses)}κ°{graph_info}, Child Chunk: {len(relevant_chunks)}κ°, μ΄ {len(context)}μ)") | |
| else: | |
| # RAG κ²μ κ²°κ³Όκ° μμΌλ©΄ κΈ°μ‘΄ λ°©μ μ¬μ© | |
| print(f"[RAG κ²μ] κ΄λ ¨ μ²ν¬λ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€. μ 체 νμΌ λ΄μ© μ¬μ©") | |
| use_rag = False | |
| # RAG κ²μ κ²°κ³Όκ° μκ±°λ λΉνμ±νλ κ²½μ° κΈ°μ‘΄ λ°©μ μ¬μ© | |
| if not context and not use_rag: | |
| if file_ids: | |
| # μ νν νμΌ IDμ μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| uploaded_files = UploadedFile.query.filter( | |
| UploadedFile.id.in_(expanded_file_ids), | |
| UploadedFile.model_name == analysis_model | |
| ).all() | |
| print(f"[νμΌ μ¬μ©] μ νλ νμΌ IDλ‘ μ‘°ν (μ΄μ΄μ μ λ‘λ ν¬ν¨): {len(uploaded_files)}κ° νμΌ") | |
| else: | |
| # νμΌ IDκ° μμΌλ©΄ ν΄λΉ λͺ¨λΈμ λͺ¨λ νμΌ μ¬μ© (μλ³Έ λ° μ΄μ΄μ μ λ‘λ ν¬ν¨) | |
| uploaded_files = UploadedFile.query.filter_by(model_name=analysis_model).all() | |
| print(f"[νμΌ μ¬μ©] λͺ¨λΈ '{analysis_model}'μ λͺ¨λ νμΌ μ¬μ©: {len(uploaded_files)}κ° νμΌ") | |
| if uploaded_files: | |
| print(f"[νμΌ μ¬μ©] μ¬μ©λλ νμΌ λͺ©λ‘:") | |
| for f in uploaded_files: | |
| is_child = f.parent_file_id is not None | |
| prefix = " ββ " if is_child else " - " | |
| print(f"{prefix}{f.original_filename} (λͺ¨λΈ: {f.model_name})") | |
| context_parts = [] | |
| for file in uploaded_files: | |
| try: | |
| if os.path.exists(file.file_path): | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| file_content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| file_content = f.read() | |
| # νμΌ λ΄μ©μ΄ λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 20000μλ‘ μ¦κ°) | |
| if len(file_content) > 20000: | |
| file_content = file_content[:20000] + "..." | |
| context_parts.append(f"[{file.original_filename}]\n{file_content}") | |
| except Exception as e: | |
| print(f"νμΌ μ½κΈ° μ€λ₯ ({file.original_filename}): {str(e)}") | |
| continue | |
| if context_parts: | |
| context = "\n\n".join(context_parts) | |
| context = f"""λ€μμ νμ΅λ μΉμμ€ λ΄μ©μ λλ€: | |
| {context} | |
| μ λ΄μ©μ μ°Έκ³ νμ¬ λ€μ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| # μμ€ν ν둬ννΈ κ°μ Έμ€κΈ° | |
| system_prompt = SystemConfig.get_config('system_prompt', '').strip() | |
| # ν둬ννΈ κ΅¬μ± (μμ€ν ν둬ννΈ + 컨ν μ€νΈ + μ¬μ©μ λ©μμ§) | |
| prompt_parts = [] | |
| if system_prompt: | |
| prompt_parts.append(system_prompt) | |
| if context: | |
| prompt_parts.append(context) | |
| prompt_parts.append(message) | |
| full_prompt = "\n\n".join(prompt_parts) | |
| if system_prompt: | |
| print(f"[ν둬ννΈ] μμ€ν ν둬ννΈ μ μ©: {len(system_prompt)}μ") | |
| # μ΅μ’ λ΅λ³ μμ±μ answer_model μ¬μ© | |
| if not answer_model: | |
| return jsonify({'error': 'λ΅λ³μ© λͺ¨λΈμ΄ μ νλμ§ μμμ΅λλ€.'}), 400 | |
| # λͺ¨λΈ νμ νμΈ (Gemini λλ Ollama) | |
| is_gemini = answer_model.startswith('gemini:') | |
| print(f"[μ΅μ’ λ΅λ³ μμ±] λ΅λ³ λͺ¨λΈ: {answer_model}, ν둬ννΈ κΈΈμ΄: {len(full_prompt)}μ") | |
| if is_gemini: | |
| # Gemini API νΈμΆ | |
| gemini_model_name = answer_model.replace('gemini:', '') | |
| print(f"[Gemini] λͺ¨λΈ: {gemini_model_name}, μ§λ¬Έ: {message[:50]}...") | |
| gemini_client = get_gemini_client() | |
| if not gemini_client.is_configured(): | |
| return jsonify({'error': 'Gemini API ν€κ° μ€μ λμ§ μμμ΅λλ€. GEMINI_API_KEY νκ²½ λ³μλ₯Ό μ€μ νμΈμ.'}), 500 | |
| result = gemini_client.generate_response( | |
| prompt=full_prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.7, | |
| max_output_tokens=get_model_token_limit(gemini_model_name or "gemini-1.5-flash", 8192) # μ μ₯λ ν ν° μ μ¬μ© | |
| ) | |
| if result['error']: | |
| return jsonify({'error': result['error']}), 500 | |
| response_text = result.get('response', '').strip() | |
| if not response_text: | |
| print(f"[μ±ν ] Gemini μλ΅μ΄ λΉμ΄μμ΅λλ€. result: {result}") | |
| response_text = 'μλ΅μ μμ±ν μ μμμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ.' | |
| else: | |
| # Ollama API νΈμΆ | |
| # Ollama μλ² μ°κ²° νμΈ | |
| try: | |
| # κ°λ¨ν μ°κ²° ν μ€νΈ | |
| test_response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5) | |
| if test_response.status_code != 200: | |
| return jsonify({'error': f'Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€. (μν μ½λ: {test_response.status_code}) Ollamaκ° μ€ν μ€μΈμ§ νμΈνμΈμ. Ollama URL: {OLLAMA_BASE_URL}'}), 503 | |
| except requests.exceptions.ConnectionError: | |
| return jsonify({'error': f'Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§ νμΈνμΈμ. Ollama URL: {OLLAMA_BASE_URL}'}), 503 | |
| except Exception as e: | |
| return jsonify({'error': f'Ollama μλ² μ°κ²° νμΈ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}. Ollama URL: {OLLAMA_BASE_URL}'}), 503 | |
| # μ λ ₯ ν ν° μλ₯Ό num_ctxλ‘ μ¬μ© | |
| num_ctx = get_model_token_limit_by_type(answer_model, 100000, 'input') | |
| num_predict = get_model_token_limit_by_type(answer_model, 8192, 'output') | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/generate', | |
| json={ | |
| 'model': answer_model, # λ΅λ³ λͺ¨λΈ μ¬μ© | |
| 'prompt': full_prompt, | |
| 'stream': False, | |
| 'options': { | |
| 'num_ctx': num_ctx, # μ λ ₯ ν ν° μλ₯Ό 컨ν μ€νΈ μλμ°λ‘ μ¬μ© | |
| 'num_predict': num_predict # μΆλ ₯ ν ν° μ | |
| } | |
| }, | |
| timeout=120 # νμΌμ΄ λ§μ μ μμΌλ―λ‘ νμμμ μ¦κ° | |
| ) | |
| if ollama_response.status_code != 200: | |
| # μ€λ₯ μμΈ μ 보 κ°μ Έμ€κΈ° | |
| try: | |
| error_detail = ollama_response.json().get('error', ollama_response.text[:200]) | |
| except: | |
| error_detail = ollama_response.text[:200] if ollama_response.text else 'μμΈ μ 보 μμ' | |
| if ollama_response.status_code == 404: | |
| error_msg = f'λͺ¨λΈ "{answer_model}"μ(λ₯Ό) μ°Ύμ μ μμ΅λλ€. λͺ¨λΈμ΄ Ollamaμ μ€μΉλμ΄ μλμ§ νμΈνμΈμ. (μ€λ₯: {error_detail})' | |
| else: | |
| error_msg = f'Ollama μλ² μ€λ₯: {ollama_response.status_code} (μ€λ₯: {error_detail})' | |
| return jsonify({'error': error_msg}), ollama_response.status_code | |
| ollama_data = ollama_response.json() | |
| response_text = ollama_data.get('response', '').strip() | |
| if not response_text: | |
| print(f"[μ±ν ] Ollama μλ΅μ΄ λΉμ΄μμ΅λλ€. ollama_data: {ollama_data}") | |
| response_text = 'μλ΅μ μμ±ν μ μμμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ.' | |
| # λν μΈμ μ λ©μμ§ μ μ₯ (Geminiμ Ollama 곡ν΅) | |
| session_id = data.get('session_id') | |
| session_dict = None | |
| if session_id: | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first() | |
| if session: | |
| # μ¬μ©μ λ©μμ§κ° μ΄λ―Έ μ μ₯λμ΄ μλμ§ νμΈ (μ€λ³΅ λ°©μ§) | |
| # κ°μ₯ μ΅κ·Ό λ©μμ§λ₯Ό νμΈνμ¬ μ€λ³΅ μ μ₯ λ°©μ§ | |
| latest_user_msg = ChatMessage.query.filter_by( | |
| session_id=session_id, | |
| role='user' | |
| ).order_by(ChatMessage.created_at.desc()).first() | |
| # μ΅κ·Ό 10μ΄ μ΄λ΄μ κ°μ λ΄μ©μ λ©μμ§κ° μμΌλ©΄ μ μ₯ | |
| should_save = True | |
| if latest_user_msg: | |
| time_diff = (datetime.utcnow() - latest_user_msg.created_at).total_seconds() | |
| if latest_user_msg.content == message and time_diff < 10: | |
| should_save = False | |
| print(f"[μ€λ³΅ λ°©μ§] μ΅κ·Ό {time_diff:.2f}μ΄ μ μ κ°μ λ©μμ§κ° μ μ₯λμ΄ μμ΅λλ€. μ μ₯μ 건λλλλ€.") | |
| if should_save: | |
| user_msg = ChatMessage( | |
| session_id=session_id, | |
| role='user', | |
| content=message | |
| ) | |
| db.session.add(user_msg) | |
| print(f"[λ©μμ§ μ μ₯] μ¬μ©μ λ©μμ§ μ μ₯: {message[:50]}...") | |
| # μΈμ μ λͺ© μ λ°μ΄νΈ (첫 μ¬μ©μ λ©μμ§μΈ κ²½μ°) | |
| title_needs_update = ( | |
| not session.title or | |
| session.title.strip() == '' or | |
| session.title == 'μ λν' | |
| ) | |
| if title_needs_update and message.strip(): | |
| # λ©μμ§ λ΄μ©μ μ λͺ©μΌλ‘ μ¬μ© (μ΅λ 30μ) | |
| title = message.strip()[:30] | |
| if len(message.strip()) > 30: | |
| title += '...' | |
| session.title = title | |
| print(f"[μΈμ μ λͺ©] μ λ°μ΄νΈ: '{title}' (μλ³Έ κΈΈμ΄: {len(message.strip())}μ)") | |
| elif title_needs_update: | |
| print(f"[μΈμ μ λͺ©] λ©μμ§κ° λΉμ΄μμ΄ μ λͺ©μ μ λ°μ΄νΈνμ§ μμ΅λλ€.") | |
| else: | |
| print(f"[λ©μμ§ μ μ₯] μ€λ³΅ λ©μμ§λ‘ μΈν΄ μ μ₯μ 건λλλλ€.") | |
| # AI μλ΅ μ μ₯ | |
| ai_msg = ChatMessage( | |
| session_id=session_id, | |
| role='ai', | |
| content=response_text | |
| ) | |
| db.session.add(ai_msg) | |
| # μΈμ λͺ¨λΈ μ 보 μ λ°μ΄νΈ (첫 λ©μμ§μΈ κ²½μ° λλ λ³κ²½λ κ²½μ°) | |
| if not session.analysis_model or session.analysis_model != analysis_model: | |
| session.analysis_model = analysis_model | |
| if not session.answer_model or session.answer_model != answer_model: | |
| session.answer_model = answer_model | |
| # νμ νΈνμ±μ μν΄ model_nameλ μ λ°μ΄νΈ | |
| if not session.model_name: | |
| session.model_name = answer_model or analysis_model | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| # μΈμ μ 보λ₯Ό μλ΅μ ν¬ν¨ (μ λͺ© μ λ°μ΄νΈ λ°μ) | |
| session_dict = session.to_dict() | |
| except Exception as e: | |
| print(f"λ©μμ§ μ μ₯ μ€λ₯: {str(e)}") | |
| db.session.rollback() | |
| session_dict = None | |
| # μλ΅μ΄ λΉμ΄μμΌλ©΄ κΈ°λ³Έ λ©μμ§ μ¬μ© | |
| if not response_text or not response_text.strip(): | |
| print(f"[μ±ν ] μ΅μ’ μλ΅μ΄ λΉμ΄μμ΅λλ€. κΈ°λ³Έ λ©μμ§λ₯Ό μ¬μ©ν©λλ€.") | |
| response_text = 'μλ΅μ μμ±ν μ μμμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ.' | |
| print(f"[μ±ν ] μ΅μ’ μλ΅ κΈΈμ΄: {len(response_text)}μ, 미리보기: {response_text[:100]}...") | |
| response_data = {'response': response_text, 'session_id': session_id} | |
| if session_dict: | |
| response_data['session'] = session_dict | |
| return jsonify(response_data) | |
| except requests.exceptions.ConnectionError as e: | |
| error_msg = f'Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§ νμΈνμΈμ. (URL: {OLLAMA_BASE_URL})' | |
| print(f"[μ±ν ] Ollama μ°κ²° μ€λ₯: {str(e)}") | |
| return jsonify({'error': error_msg}), 503 | |
| except requests.exceptions.Timeout: | |
| return jsonify({'error': 'μλ΅ μκ°μ΄ μ΄κ³Όλμμ΅λλ€. λ μ§§μ λ©μμ§λ₯Ό μλν΄λ³΄μΈμ.'}), 504 | |
| except Exception as e: | |
| error_msg = f'Ollama ν΅μ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}' | |
| print(f"[μ±ν ] Ollama ν΅μ μ€λ₯ μμΈ: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg}), 500 | |
| else: | |
| # λͺ¨λΈμ΄ μ νλμ§ μμ κ²½μ° κΈ°λ³Έ μλ΅ | |
| response_text = f"μλ νμΈμ! '{message}'μ λν λ΅λ³μ μ€λΉ μ€μ λλ€.\n\nμ’μΈ‘ νλ¨μμ λ‘컬 AI λͺ¨λΈμ μ ννλ©΄ λ μ νν λ΅λ³μ μ 곡ν μ μμ΅λλ€." | |
| return jsonify({'response': response_text}) | |
| except Exception as e: | |
| return jsonify({'error': f'μ±ν μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def upload_file(): | |
| """μΉμμ€ νμΌ μ λ‘λ""" | |
| import sys | |
| import traceback | |
| # λͺ¨λ μΆλ ₯μ μ¦μ νλ¬μνμ¬ λ‘κ·Έκ° λ°λ‘ 보μ΄λλ‘ | |
| def log_print(*args, **kwargs): | |
| from datetime import datetime | |
| timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] | |
| print(f"[{timestamp}]", *args, **kwargs) | |
| sys.stdout.flush() | |
| try: | |
| log_print(f"\n{'='*60}") | |
| log_print(f"=== νμΌ μ λ‘λ μμ² μμ ===") | |
| log_print(f"μμ² URL: {request.url}") | |
| log_print(f"μμ² λ©μλ: {request.method}") | |
| log_print(f"Content-Type: {request.content_type}") | |
| log_print(f"Content-Length: {request.content_length}") | |
| log_print(f"Remote Address: {request.remote_addr}") | |
| log_print(f"Headers: {dict(request.headers)}") | |
| log_print(f"Form λ°μ΄ν° ν€: {list(request.form.keys())}") | |
| log_print(f"Files ν€: {list(request.files.keys())}") | |
| log_print(f"μ¬μ©μ: {current_user.username if current_user and current_user.is_authenticated else 'None'}") | |
| log_print(f"μ¬μ©μ μΈμ¦ μν: {current_user.is_authenticated if current_user else False}") | |
| log_print(f"{'='*60}\n") | |
| # μ λ‘λ ν΄λ νμΈ λ° μμ± | |
| try: | |
| ensure_upload_folder() | |
| log_print(f"[1/8] μ λ‘λ ν΄λ νμΈ μλ£: {UPLOAD_FOLDER}") | |
| except Exception as e: | |
| error_msg = f'μ λ‘λ ν΄λλ₯Ό μ€λΉν μ μμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'folder_check'}), 500 | |
| if 'file' not in request.files: | |
| error_msg = 'νμΌμ΄ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| log_print(f"μ¬μ© κ°λ₯ν ν€: {list(request.files.keys())}") | |
| return jsonify({'error': error_msg, 'step': 'file_check'}), 400 | |
| file = request.files['file'] | |
| model_name = request.form.get('model_name', '').strip() | |
| parent_file_id = request.form.get('parent_file_id', None) # μ΄μ΄μ μ λ‘λν κ²½μ° μλ³Έ νμΌ ID | |
| log_print(f"[2/8] νμΌ μμ : {file.filename if file else 'None'}") | |
| log_print(f"[2/8] λͺ¨λΈλͺ : {model_name if model_name else 'None (λΉμ΄μμ)'}") | |
| log_print(f"[2/8] μ΄μ΄μ μ λ‘λ: {parent_file_id if parent_file_id else 'μλμ€'}") | |
| if file.filename == '': | |
| error_msg = 'νμΌλͺ μ΄ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'filename_check'}), 400 | |
| # λͺ¨λΈλͺ κ²μ¦ | |
| if not model_name: | |
| error_msg = 'AI λͺ¨λΈμ μ νν΄μ£ΌμΈμ.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'model_check'}), 400 | |
| # parent_file_id κ²μ¦ (μ΄μ΄μ μ λ‘λμΈ κ²½μ°) | |
| parent_file = None | |
| if parent_file_id: | |
| try: | |
| parent_file_id = int(parent_file_id) | |
| parent_file = UploadedFile.query.filter_by( | |
| id=parent_file_id, | |
| uploaded_by=current_user.id | |
| ).first() | |
| if not parent_file: | |
| error_msg = 'μλ³Έ νμΌμ μ°Ύμ μ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'parent_file_check'}), 404 | |
| # κ°μ λͺ¨λΈμΈμ§ νμΈ | |
| if parent_file.model_name != model_name: | |
| error_msg = 'κ°μ λͺ¨λΈμ νμΌμλ§ μ΄μ΄μ μ λ‘λν μ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'model_mismatch'}), 400 | |
| log_print(f"[μ΄μ΄μ μ λ‘λ] μλ³Έ νμΌ: {parent_file.original_filename} (ID: {parent_file_id})") | |
| except (ValueError, TypeError): | |
| parent_file_id = None | |
| log_print(f"[κ²½κ³ ] μλͺ»λ parent_file_id: {parent_file_id}") | |
| log_print(f"[3/8] μ λ‘λ μλ: {file.filename}, λͺ¨λΈ: {model_name}") | |
| if not allowed_file(file.filename): | |
| error_msg = f'νμ©λμ§ μμ νμΌ νμμ λλ€. νμ© νμ: {", ".join(ALLOWED_EXTENSIONS)}' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'file_type_check'}), 400 | |
| log_print(f"[4/8] νμΌ νμ νμΈ μλ£: {file.filename}") | |
| # νμΌ ν¬κΈ° νμΈ (Content-Length ν€λ μ¬μ©) | |
| file_size = 0 | |
| try: | |
| # Content-Length ν€λ νμΈ | |
| if request.content_length: | |
| file_size = request.content_length | |
| print(f"Content-Lengthλ‘ νμΌ ν¬κΈ° νμΈ: {file_size} bytes") | |
| else: | |
| # Content-Lengthκ° μμΌλ©΄ νμΌ μ€νΈλ¦Όμμ ν¬κΈ° νμΈ μλ | |
| try: | |
| # νμΌ μ€νΈλ¦Όμ νμ¬ μμΉ μ μ₯ | |
| current_pos = file.tell() | |
| # νμΌ λμΌλ‘ μ΄λ | |
| file.seek(0, os.SEEK_END) | |
| file_size = file.tell() | |
| # μλ μμΉλ‘ 볡μ | |
| file.seek(current_pos, os.SEEK_SET) | |
| print(f"νμΌ μ€νΈλ¦ΌμΌλ‘ ν¬κΈ° νμΈ: {file_size} bytes") | |
| except (AttributeError, IOError, OSError) as e: | |
| print(f"νμΌ ν¬κΈ° νμΈ μ€ν¨ (μ μ₯ ν νμΈ): {str(e)}") | |
| file_size = 0 # μ μ₯ ν νμΈνλλ‘ 0μΌλ‘ μ€μ | |
| except Exception as e: | |
| print(f"νμΌ ν¬κΈ° νμΈ μ€λ₯: {str(e)}") | |
| file_size = 0 # μ μ₯ ν νμΈνλλ‘ 0μΌλ‘ μ€μ | |
| # νμΌ ν¬κΈ° μ¬μ μ²΄ν¬ (κ°λ₯ν κ²½μ°μλ§) | |
| if file_size > 0: | |
| if file_size > 100 * 1024 * 1024: # 100MB | |
| print(f"νμΌ ν¬κΈ° μ΄κ³Ό: {file_size} bytes") | |
| return jsonify({'error': 'νμΌ ν¬κΈ°κ° λ무 ν½λλ€. μ΅λ 100MBκΉμ§ μ λ‘λ κ°λ₯ν©λλ€.'}), 400 | |
| if file_size == 0: | |
| print("λΉ νμΌ μ λ‘λ μλ") | |
| return jsonify({'error': 'λΉ νμΌμ μ λ‘λν μ μμ΅λλ€.'}), 400 | |
| # μμ ν νμΌλͺ μμ± | |
| original_filename = file.filename | |
| filename = secure_filename(original_filename) | |
| if not filename: | |
| return jsonify({'error': 'μ ν¨νμ§ μμ νμΌλͺ μ λλ€.'}), 400 | |
| unique_filename = f"{uuid.uuid4().hex}_{filename}" | |
| file_path = os.path.join(UPLOAD_FOLDER, unique_filename) | |
| # νμΌ μ μ₯ | |
| try: | |
| log_print(f"[6/8] νμΌ μ μ₯ μλ: {file_path}") | |
| file.save(file_path) | |
| log_print(f"[6/8] νμΌ μ μ₯ μλ£: {file_path}") | |
| except IOError as e: | |
| error_msg = f'νμΌ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ IOError: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save'}), 500 | |
| except PermissionError as e: | |
| error_msg = f'νμΌ μ μ₯ κΆν μ€λ₯: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ PermissionError: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save_permission'}), 500 | |
| except Exception as e: | |
| error_msg = f'νμΌ μ μ₯ μ€ν¨: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ Exception: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save'}), 500 | |
| # μ μ₯λ νμΌ ν¬κΈ° νμΈ | |
| if not os.path.exists(file_path): | |
| error_msg = 'νμΌμ΄ μ μ₯λμ§ μμμ΅λλ€.' | |
| print(f"νμΌ μ‘΄μ¬ νμΈ μ€ν¨: {file_path}") | |
| return jsonify({'error': error_msg}), 500 | |
| saved_file_size = os.path.getsize(file_path) | |
| if saved_file_size == 0: | |
| os.remove(file_path) # λΉ νμΌ μμ | |
| error_msg = 'νμΌμ΄ μ λλ‘ μ μ₯λμ§ μμμ΅λλ€.' | |
| print(f"λΉ νμΌ μμ : {file_path}") | |
| return jsonify({'error': error_msg}), 500 | |
| print(f"μ μ₯λ νμΌ ν¬κΈ°: {saved_file_size} bytes") | |
| # λ°μ΄ν°λ² μ΄μ€μ μ μ₯ | |
| try: | |
| log_print(f"[7/8] λ°μ΄ν°λ² μ΄μ€ μ μ₯ μλ: {original_filename}") | |
| uploaded_file = UploadedFile( | |
| filename=unique_filename, | |
| original_filename=original_filename, | |
| file_path=file_path, | |
| file_size=saved_file_size, | |
| model_name=model_name, # μ΄λ―Έ κ²μ¦λ¨ | |
| is_public=False, # κΈ°λ³Έκ°: λ―Έκ³΅κ° | |
| uploaded_by=current_user.id, | |
| parent_file_id=parent_file_id if parent_file else None # μ΄μ΄μ μ λ‘λμΈ κ²½μ° | |
| ) | |
| db.session.add(uploaded_file) | |
| db.session.flush() # IDλ₯Ό μ»κΈ° μν΄ flush | |
| log_print(f"[7/8] λ°μ΄ν°λ² μ΄μ€ flush μλ£, νμΌ ID: {uploaded_file.id}") | |
| # νμΌ μ μ₯λ§ μλ£ (μ²ν¬ μμ±μ λ³λ APIλ‘ μ²λ¦¬) | |
| db.session.commit() | |
| log_print(f"[8/8] λ°μ΄ν°λ² μ΄μ€ μ»€λ° μλ£: {original_filename}") | |
| log_print(f"[8/8] μ°κ²°λ λͺ¨λΈ: {model_name}") | |
| log_print(f"{'='*60}") | |
| log_print(f"=== νμΌ μ λ‘λ μλ£ (μ²λ¦¬ λκΈ° μ€) ===") | |
| log_print(f"{'='*60}\n") | |
| log_print(f"[8/8] μ λ‘λ μλ£ - νμΌ: {original_filename}, λͺ¨λΈ: {model_name}, ν¬κΈ°: {saved_file_size} bytes") | |
| log_print(f"[8/8] λ€μ λ¨κ³: Parent Chunk μμ±, Chunk μμ±, νμ°¨ λΆμ, Graph Extractionμ λ³λλ‘ μ§νν©λλ€.") | |
| # νμ°¨ μ κ³μ° (μΉμ λΆν κ²°κ³Ό κΈ°λ°) - νμΌ μ½κΈ° νμ | |
| episode_count = 0 | |
| if original_filename.lower().endswith(('.txt', '.md')): | |
| try: | |
| encoding = 'utf-8' | |
| try: | |
| with open(file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| sections = split_content_by_episodes(content) | |
| # '#μνμ€λͺ 'μ μ μΈν νμ°¨ μ | |
| episode_sections = [s for s in sections if s[0] != 'μνμ€λͺ '] | |
| episode_count = len(episode_sections) | |
| log_print(f"[8/8] νμ°¨ μ κ³μ°: {episode_count}κ° νμ°¨") | |
| except Exception as e: | |
| log_print(f"[8/8] νμ°¨ μ κ³μ° μ€λ₯: {str(e)}") | |
| episode_count = 0 | |
| return jsonify({ | |
| 'message': f'νμΌμ΄ μ±κ³΅μ μΌλ‘ μ λ‘λλμμ΅λλ€. (λͺ¨λΈ: {model_name})', | |
| 'file': uploaded_file.to_dict(), | |
| 'model_name': model_name, | |
| 'file_id': uploaded_file.id, | |
| 'episode_count': episode_count, # νμ°¨ μ μΆκ° | |
| 'needs_processing': original_filename.lower().endswith(('.txt', '.md')) # μ²λ¦¬ νμ μ¬λΆ | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = f'λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€λ₯: {error_msg}") | |
| traceback.print_exc() | |
| # λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€ν¨ μ νμΌλ μμ | |
| if 'file_path' in locals() and os.path.exists(file_path): | |
| try: | |
| os.remove(file_path) | |
| log_print(f"μ€λ₯λ‘ μΈν νμΌ μμ : {file_path}") | |
| except Exception as del_e: | |
| log_print(f"νμΌ μμ μ€ν¨: {str(del_e)}") | |
| return jsonify({'error': error_msg, 'step': 'database_save'}), 500 | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = str(e) | |
| error_type = type(e).__name__ | |
| log_print(f"\n{'='*60}") | |
| log_print(f"=== μ λ‘λ μ²λ¦¬ μ€ μμΈ λ°μ ===") | |
| log_print(f"μμΈ νμ : {error_type}") | |
| log_print(f"μλ¬ λ©μμ§: {error_msg}") | |
| traceback.print_exc() | |
| log_print(f"{'='*60}\n") | |
| # νμΌ ν¬κΈ° μ΄κ³Ό μ€λ₯ μ²λ¦¬ | |
| if '413' in error_msg or 'Request Entity Too Large' in error_msg or error_type == 'RequestEntityTooLarge': | |
| return jsonify({'error': 'νμΌ ν¬κΈ°κ° λ무 ν½λλ€. μ΅λ 100MBκΉμ§ μ λ‘λ κ°λ₯ν©λλ€.', 'step': 'file_size'}), 413 | |
| return jsonify({'error': f'νμΌ μ λ‘λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {error_type}: {error_msg}', 'step': 'exception'}), 500 | |
| def get_files(): | |
| """μ λ‘λλ νμΌ λͺ©λ‘ μ‘°ν""" | |
| try: | |
| model_name = request.args.get('model_name', None) | |
| public_only = request.args.get('public_only', 'false').lower() == 'true' # κ³΅κ° νμΌλ§ μ‘°ν μ΅μ | |
| # μλ³Έ νμΌλ§ μ‘°ν (parent_file_idκ° NoneμΈ νμΌ) | |
| # κ΄λ¦¬μκ° μλ κ²½μ° κ³΅κ° νμΌλ§ μ‘°ν, κ΄λ¦¬μλ λͺ¨λ νμΌ μ‘°ν κ°λ₯ | |
| if public_only or (not current_user.is_admin): | |
| query = UploadedFile.query.filter_by(parent_file_id=None, is_public=True) | |
| print(f"[νμΌ μ‘°ν] κ³΅κ° νμΌλ§ μ‘°ν (μ¬μ©μ: {current_user.username}, κ΄λ¦¬μ: {current_user.is_admin})") | |
| else: | |
| query = UploadedFile.query.filter_by(parent_file_id=None) | |
| print(f"[νμΌ μ‘°ν] λͺ¨λ νμΌ μ‘°ν (μ¬μ©μ: {current_user.username}, κ΄λ¦¬μ: {current_user.is_admin})") | |
| # λͺ¨λΈ νν°λ§ μ μ 체 νμΌ μ νμΈ | |
| total_before_filter = query.count() | |
| print(f"[νμΌ μ‘°ν] νν°λ§ μ νμΌ μ: {total_before_filter}κ°") | |
| if model_name: | |
| query = query.filter_by(model_name=model_name) | |
| print(f"[νμΌ μ‘°ν] λͺ¨λΈ '{model_name}' νν°λ§") | |
| files = query.order_by(UploadedFile.uploaded_at.desc()).all() | |
| # νν°λ§ ν νμΌ μμ λͺ¨λΈλͺ νμΈ | |
| print(f"[νμΌ μ‘°ν] νν°λ§ ν νμΌ μ: {len(files)}κ°") | |
| if len(files) > 0: | |
| print(f"[νμΌ μ‘°ν] 첫 λ²μ§Έ νμΌ λͺ¨λΈλͺ : {files[0].model_name}") | |
| else: | |
| # νν°λ§ κ²°κ³Όκ° μμ λ μ€μ μ‘΄μ¬νλ λͺ¨λΈλͺ νμΈ | |
| all_files = UploadedFile.query.filter_by(parent_file_id=None).all() | |
| unique_models = set(f.model_name for f in all_files if f.model_name) | |
| print(f"[νμΌ μ‘°ν] λ°μ΄ν°λ² μ΄μ€μ μ‘΄μ¬νλ λͺ¨λΈλͺ λͺ©λ‘: {list(unique_models)}") | |
| # κ° μλ³Έ νμΌμ λν΄ μ΄μ΄μ μ λ‘λλ νμΌλ ν¬ν¨ | |
| files_with_children = [] | |
| for file in files: | |
| file_dict = file.to_dict() | |
| # μ²ν¬ κ°μ μΆκ° | |
| chunk_count = DocumentChunk.query.filter_by(file_id=file.id).count() | |
| file_dict['chunk_count'] = chunk_count | |
| # Parent Chunk μ‘΄μ¬ μ¬λΆ νμΈ | |
| has_parent_chunk = ParentChunk.query.filter_by(file_id=file.id).first() is not None | |
| file_dict['has_parent_chunk'] = has_parent_chunk | |
| # μ΄μ΄μ μ λ‘λλ νμΌλ€λ μ‘°ν | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file.id).order_by(UploadedFile.uploaded_at.asc()).all() | |
| child_files_dict = [] | |
| for child in child_files: | |
| child_dict = child.to_dict() | |
| child_chunk_count = DocumentChunk.query.filter_by(file_id=child.id).count() | |
| child_dict['chunk_count'] = child_chunk_count | |
| # Child νμΌλ Parent Chunk νμΈ | |
| child_has_parent_chunk = ParentChunk.query.filter_by(file_id=child.id).first() is not None | |
| child_dict['has_parent_chunk'] = child_has_parent_chunk | |
| child_files_dict.append(child_dict) | |
| file_dict['child_files'] = child_files_dict | |
| files_with_children.append(file_dict) | |
| # λͺ¨λΈλ³ ν΅κ³ μ 보 μΆκ° (μλ³Έ νμΌλ§ μΉ΄μ΄νΈ, κ³΅κ° νμΌλ§) | |
| model_stats = {} | |
| if not model_name: | |
| # λͺ¨λ λͺ¨λΈμ ν΅κ³ (μλ³Έ νμΌλ§, κ³΅κ° νμΌλ§) | |
| if public_only or (not current_user.is_admin): | |
| all_files = UploadedFile.query.filter_by(parent_file_id=None, is_public=True).all() | |
| else: | |
| all_files = UploadedFile.query.filter_by(parent_file_id=None).all() | |
| for file in all_files: | |
| model = file.model_name or 'λ―Έμ§μ ' | |
| if model not in model_stats: | |
| model_stats[model] = {'count': 0, 'total_size': 0} | |
| model_stats[model]['count'] += 1 | |
| model_stats[model]['total_size'] += file.file_size | |
| else: | |
| # νΉμ λͺ¨λΈμ ν΅κ³ | |
| model_stats[model_name] = { | |
| 'count': len(files), | |
| 'total_size': sum(f.file_size for f in files) | |
| } | |
| print(f"[νμΌ μ‘°ν] μ‘°νλ μλ³Έ νμΌ μ: {len(files)}κ°") | |
| return jsonify({ | |
| 'files': files_with_children, | |
| 'model_stats': model_stats, | |
| 'filtered_model': model_name | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_chunks(file_id): | |
| """νμΌμ μ²ν¬ μ 보 μ‘°ν (νμ΅ μν νμΈμ©)""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all() | |
| total_chunks = len(chunks) | |
| # μν μ²ν¬ (μ²μ 3κ°) | |
| sample_chunks = [] | |
| for chunk in chunks[:3]: | |
| sample_chunks.append({ | |
| 'index': chunk.chunk_index, | |
| 'content_preview': chunk.content[:100] + '...' if len(chunk.content) > 100 else chunk.content, | |
| 'content_length': len(chunk.content) | |
| }) | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'model_name': file.model_name, | |
| 'total_chunks': total_chunks, | |
| 'sample_chunks': sample_chunks, | |
| 'learning_status': 'ready' if total_chunks > 0 else 'not_ready', | |
| 'message': f'{total_chunks}κ° μ²ν¬κ° μ μ₯λμ΄ RAG κ²μμ μ¬μ© κ°λ₯ν©λλ€.' if total_chunks > 0 else 'μ²ν¬κ° μμ±λμ§ μμ RAG κ²μμ΄ λΆκ°λ₯ν©λλ€.' | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ²ν¬ μ 보 μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_file_chunks(file_id): | |
| """νμΌμ λͺ¨λ μ²ν¬ λͺ©λ‘κ³Ό λ΄μ© μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| # κ΄λ¦¬μλ λͺ¨λ νμΌ μ‘°ν κ°λ₯ | |
| if current_user.is_admin: | |
| file = UploadedFile.query.get(file_id) | |
| else: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all() | |
| chunks_data = [] | |
| for chunk in chunks: | |
| chunk_dict = { | |
| 'id': chunk.id, | |
| 'chunk_index': chunk.chunk_index, | |
| 'content': chunk.content, | |
| 'content_length': len(chunk.content), | |
| 'created_at': chunk.created_at.isoformat() if chunk.created_at else None | |
| } | |
| # λ©νλ°μ΄ν° νμ± | |
| if chunk.chunk_metadata: | |
| try: | |
| metadata = json.loads(chunk.chunk_metadata) | |
| chunk_dict['metadata'] = metadata | |
| except: | |
| chunk_dict['metadata'] = None | |
| else: | |
| chunk_dict['metadata'] = None | |
| chunks_data.append(chunk_dict) | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'model_name': file.model_name, | |
| 'total_chunks': len(chunks_data), | |
| 'chunks': chunks_data | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ²ν¬ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_summary(file_id): | |
| """νμΌμ μμ½ λ΄μ© μ‘°ν (Parent Chunk + Episode Analysis)""" | |
| try: | |
| print(f"[μμ½ μ‘°ν] νμΌ ID {file_id} μμ½ λ΄μ© μ‘°ν μμ² (μ¬μ©μ: {current_user.username})") | |
| # λͺ¨λ μ¬μ©μκ° λͺ¨λ νμΌ μ‘°ν κ°λ₯ (κ΄λ¦¬μ νμ΄μ§μ λμΌ) | |
| file = UploadedFile.query.get(file_id) | |
| if not file: | |
| print(f"[μμ½ μ‘°ν] νμΌμ μ°Ύμ μ μμ: νμΌ ID {file_id}") | |
| # λλ²κΉ : μ 체 νμΌ λͺ©λ‘ νμΈ | |
| all_files = UploadedFile.query.all() | |
| print(f"[μμ½ μ‘°ν] λ°μ΄ν°λ² μ΄μ€μ μ‘΄μ¬νλ νμΌ ID λͺ©λ‘: {[f.id for f in all_files]}") | |
| return jsonify({'error': f'νμΌμ μ°Ύμ μ μμ΅λλ€. (νμΌ ID: {file_id})'}), 404 | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| episode_analysis = EpisodeAnalysis.query.filter_by(file_id=file_id).first() | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'parent_chunk': parent_chunk.to_dict() if parent_chunk else None, | |
| 'episode_analysis': episode_analysis.to_dict() if episode_analysis else None, | |
| 'has_parent_chunk': parent_chunk is not None, | |
| 'has_episode_analysis': episode_analysis is not None | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μμ½ λ΄μ© μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_graph(file_id): | |
| """νμΌμ GraphRAG λ°μ΄ν° μ‘°ν (μν°ν°, κ΄κ³, μ¬κ±΄)""" | |
| try: | |
| print(f"[GraphRAG μ‘°ν] νμΌ ID {file_id} GraphRAG λ°μ΄ν° μ‘°ν μμ² (μ¬μ©μ: {current_user.username})") | |
| file = UploadedFile.query.get(file_id) | |
| if not file: | |
| print(f"[GraphRAG μ‘°ν] νμΌμ μ°Ύμ μ μμ: νμΌ ID {file_id}") | |
| return jsonify({'error': f'νμΌμ μ°Ύμ μ μμ΅λλ€. (νμΌ ID: {file_id})'}), 404 | |
| # μν°ν° μ‘°ν (νμ°¨λ³λ‘ κ·Έλ£Ήν) | |
| entities = GraphEntity.query.filter_by(file_id=file_id).all() | |
| entities_by_episode = {} | |
| for entity in entities: | |
| episode = entity.episode_title | |
| if episode not in entities_by_episode: | |
| entities_by_episode[episode] = {'characters': [], 'locations': []} | |
| if entity.entity_type == 'character': | |
| entities_by_episode[episode]['characters'].append(entity.to_dict()) | |
| elif entity.entity_type == 'location': | |
| entities_by_episode[episode]['locations'].append(entity.to_dict()) | |
| # κ΄κ³ μ‘°ν (νμ°¨λ³λ‘ κ·Έλ£Ήν) | |
| relationships = GraphRelationship.query.filter_by(file_id=file_id).all() | |
| relationships_by_episode = {} | |
| for rel in relationships: | |
| episode = rel.episode_title | |
| if episode not in relationships_by_episode: | |
| relationships_by_episode[episode] = [] | |
| relationships_by_episode[episode].append(rel.to_dict()) | |
| # μ¬κ±΄ μ‘°ν (νμ°¨λ³λ‘ κ·Έλ£Ήν) | |
| events = GraphEvent.query.filter_by(file_id=file_id).all() | |
| events_by_episode = {} | |
| for event in events: | |
| episode = event.episode_title | |
| if episode not in events_by_episode: | |
| events_by_episode[episode] = [] | |
| events_by_episode[episode].append(event.to_dict()) | |
| # ν΅κ³ μ 보 | |
| total_entities = len(entities) | |
| total_relationships = len(relationships) | |
| total_events = len(events) | |
| episodes = list(set([e.episode_title for e in entities] + | |
| [r.episode_title for r in relationships] + | |
| [ev.episode_title for ev in events])) | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'statistics': { | |
| 'total_entities': total_entities, | |
| 'total_relationships': total_relationships, | |
| 'total_events': total_events, | |
| 'episodes_count': len(episodes) | |
| }, | |
| 'entities_by_episode': entities_by_episode, | |
| 'relationships_by_episode': relationships_by_episode, | |
| 'events_by_episode': events_by_episode, | |
| 'episodes': sorted(episodes) | |
| }), 200 | |
| except Exception as e: | |
| print(f"[GraphRAG μ‘°ν] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'GraphRAG λ°μ΄ν° μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_parent_chunk(file_id): | |
| """νμΌμ Parent Chunk μ‘°ν""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if not parent_chunk: | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': False, | |
| 'message': 'Parent Chunkκ° μμ±λμ§ μμμ΅λλ€.' | |
| }), 200 | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': True, | |
| 'parent_chunk': parent_chunk.to_dict(), | |
| 'message': 'Parent Chunkκ° μ‘΄μ¬ν©λλ€.' | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'Parent Chunk μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_file_parent_chunk(file_id): | |
| """νμΌμ Parent Chunk μλ μμ± (μ¬μμ±)""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| # λͺ¨λΈλͺ νμΈ | |
| if not file.model_name: | |
| return jsonify({'error': 'νμΌμ μ°κ²°λ AI λͺ¨λΈμ΄ μμ΅λλ€. Parent Chunkλ₯Ό μμ±ν μ μμ΅λλ€.'}), 400 | |
| # νμΌμ΄ ν μ€νΈ νμΌμΈμ§ νμΈ | |
| if not file.original_filename.lower().endswith(('.txt', '.md')): | |
| return jsonify({'error': 'Parent Chunkλ ν μ€νΈ νμΌ(.txt, .md)μλ§ μμ±ν μ μμ΅λλ€.'}), 400 | |
| # νμΌ κ²½λ‘ νμΈ | |
| if not file.file_path or not os.path.exists(file.file_path): | |
| error_msg = f'νμΌ κ²½λ‘κ° μ ν¨νμ§ μμ΅λλ€: {file.file_path}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| return jsonify({'error': error_msg}), 500 | |
| # νμΌ λ΄μ© μ½κΈ° | |
| try: | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| except FileNotFoundError: | |
| error_msg = f'νμΌμ μ°Ύμ μ μμ΅λλ€: {file.file_path}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| return jsonify({'error': error_msg}), 500 | |
| except PermissionError: | |
| error_msg = f'νμΌ μ½κΈ° κΆνμ΄ μμ΅λλ€: {file.file_path}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| return jsonify({'error': error_msg}), 500 | |
| except Exception as e: | |
| error_msg = f'νμΌμ μ½μ μ μμ΅λλ€: {str(e)}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg}), 500 | |
| if not content or len(content.strip()) == 0: | |
| return jsonify({'error': 'νμΌ λ΄μ©μ΄ λΉμ΄μμ΅λλ€.'}), 400 | |
| # Parent Chunk μμ± | |
| print(f"[Parent Chunk μλ μμ±] νμΌ ID {file_id}μ λν Parent Chunk μμ± μμ") | |
| print(f"[Parent Chunk μλ μμ±] λͺ¨λΈλͺ : {file.model_name}") | |
| print(f"[Parent Chunk μλ μμ±] νμΌλͺ : {file.original_filename}") | |
| parent_chunk = create_parent_chunk_with_ai(file_id, content, file.model_name) | |
| if parent_chunk: | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': True, | |
| 'parent_chunk': parent_chunk.to_dict(), | |
| 'message': 'Parent Chunkκ° μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€.' | |
| }), 200 | |
| else: | |
| return jsonify({ | |
| 'error': 'Parent Chunk μμ±μ μ€ν¨νμ΅λλ€. μλ² λ‘κ·Έλ₯Ό νμΈνμΈμ.', | |
| 'file_id': file_id, | |
| 'filename': file.original_filename | |
| }), 500 | |
| except Exception as e: | |
| import traceback | |
| error_traceback = traceback.format_exc() | |
| error_msg = str(e) | |
| print(f"[Parent Chunk μμ±] β μμΈ λ°μ: {error_msg}") | |
| print(f"[Parent Chunk μμ±] Traceback:\n{error_traceback}") | |
| return jsonify({ | |
| 'error': f'Parent Chunk μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {error_msg}', | |
| 'file_id': file_id | |
| }), 500 | |
| def process_parent_chunk(file_id): | |
| """λ¨κ³ 1: Parent Chunk μμ±""" | |
| return create_file_parent_chunk(file_id) | |
| def process_chunks(file_id): | |
| """λ¨κ³ 2: Chunk μμ± (νμ°¨ λΆμ, Graph Extraction μ μΈ)""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| if not file.original_filename.lower().endswith(('.txt', '.md')): | |
| return jsonify({'error': 'Chunkλ ν μ€νΈ νμΌ(.txt, .md)μλ§ μμ±ν μ μμ΅λλ€.'}), 400 | |
| # νμΌ λ΄μ© μ½κΈ° | |
| try: | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌμ μ½μ μ μμ΅λλ€: {str(e)}'}), 500 | |
| print(f"[λ¨κ³ 2: Chunk μμ±] νμΌ ID {file_id}μ λν Chunk μμ± μμ") | |
| chunk_count = create_chunks_for_file(file_id, content, skip_episode_analysis=True, skip_graph_extraction=True) | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'chunk_count': chunk_count, | |
| 'message': f'Chunk {chunk_count}κ°κ° μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€.', | |
| 'step': 'chunks', | |
| 'completed': True | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'Chunk μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'step': 'chunks'}), 500 | |
| def process_episode_analysis(file_id): | |
| """λ¨κ³ 3: νμ°¨ λΆμ""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| if not file.model_name: | |
| return jsonify({'error': 'νμΌμ μ°κ²°λ AI λͺ¨λΈμ΄ μμ΅λλ€.'}), 400 | |
| if not file.original_filename.lower().endswith(('.txt', '.md')): | |
| return jsonify({'error': 'νμ°¨ λΆμμ ν μ€νΈ νμΌ(.txt, .md)μλ§ κ°λ₯ν©λλ€.'}), 400 | |
| # νμΌ λ΄μ© μ½κΈ° | |
| try: | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌμ μ½μ μ μμ΅λλ€: {str(e)}'}), 500 | |
| # μΉμ λΆν | |
| sections = split_content_by_episodes(content) | |
| episode_sections = [s for s in sections if s[0] != 'μνμ€λͺ '] | |
| if not episode_sections: | |
| return jsonify({'error': 'λΆμν νμ°¨κ° μμ΅λλ€.'}), 400 | |
| # Parent Chunk κ°μ Έμ€κΈ° | |
| parent_chunk = None | |
| try: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| except: | |
| pass | |
| # κΈ°μ‘΄ νμ°¨ λΆμ μμ | |
| existing_analyses = EpisodeAnalysis.query.filter_by(file_id=file_id).all() | |
| if existing_analyses: | |
| for analysis in existing_analyses: | |
| db.session.delete(analysis) | |
| db.session.commit() | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] νμΌ ID {file_id}μ λν νμ°¨ λΆμ μμ ({len(episode_sections)}κ° νμ°¨)") | |
| # κ° νμ°¨ λΆμ | |
| all_analyses = [] | |
| for section_type, section_title, section_content, section_metadata in episode_sections: | |
| try: | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] '{section_title}' λΆμ μ€...") | |
| analysis_result = analyze_episode( | |
| episode_content=section_content, | |
| episode_title=section_title, | |
| full_content=content, | |
| parent_chunk=parent_chunk, | |
| model_name=file.model_name | |
| ) | |
| if analysis_result: | |
| all_analyses.append(f"\n\n{analysis_result}") | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] '{section_title}' λΆμ μλ£") | |
| except Exception as e: | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] '{section_title}' λΆμ μ€ μ€λ₯: {str(e)}") | |
| continue | |
| # λͺ¨λ νμ°¨ λΆμ κ²°κ³Όλ₯Ό νλμ ν μ€νΈλ‘ μ μ₯ | |
| if all_analyses: | |
| combined_analysis = "\n".join(all_analyses).strip() | |
| episode_analysis = EpisodeAnalysis( | |
| file_id=file_id, | |
| episode_title="μ 체 νμ°¨ ν΅ν© λΆμ", | |
| analysis_content=combined_analysis | |
| ) | |
| db.session.add(episode_analysis) | |
| db.session.commit() | |
| # νμ°¨ λΆμ μ±κ³΅ ν Graph Extraction μλ μ€ν | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] Graph Extraction μλ μ€ν μμ...") | |
| graph_success_count = 0 | |
| for section_type, section_title, section_content, section_metadata in episode_sections: | |
| try: | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] '{section_title}' Graph Extraction μ€...") | |
| success = extract_graph_from_episode( | |
| episode_content=section_content, | |
| episode_title=section_title, | |
| file_id=file_id, | |
| full_content=content, | |
| parent_chunk=parent_chunk, | |
| model_name=file.model_name | |
| ) | |
| if success: | |
| graph_success_count += 1 | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] '{section_title}' Graph Extraction μλ£") | |
| except Exception as e: | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] '{section_title}' Graph Extraction μ€ μ€λ₯: {str(e)}") | |
| continue | |
| print(f"[λ¨κ³ 3: νμ°¨ λΆμ] Graph Extraction μλ£: {graph_success_count}/{len(episode_sections)}κ° νμ°¨ μ±κ³΅") | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'episode_count': len(episode_sections), | |
| 'graph_success_count': graph_success_count, | |
| 'message': f'{len(episode_sections)}κ° νμ°¨ λΆμμ΄ μλ£λμμ΅λλ€. (Graph Extraction: {graph_success_count}/{len(episode_sections)}κ° μ±κ³΅)', | |
| 'step': 'episode-analysis', | |
| 'completed': True | |
| }), 200 | |
| else: | |
| return jsonify({ | |
| 'error': 'νμ°¨ λΆμ κ²°κ³Όκ° μμ΅λλ€.', | |
| 'step': 'episode-analysis', | |
| 'completed': False | |
| }), 500 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'νμ°¨ λΆμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'step': 'episode-analysis'}), 500 | |
| def process_graph(file_id): | |
| """λ¨κ³ 4: Graph Extraction""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| if not file.model_name: | |
| return jsonify({'error': 'νμΌμ μ°κ²°λ AI λͺ¨λΈμ΄ μμ΅λλ€.'}), 400 | |
| if not file.original_filename.lower().endswith(('.txt', '.md')): | |
| return jsonify({'error': 'Graph Extractionμ ν μ€νΈ νμΌ(.txt, .md)μλ§ κ°λ₯ν©λλ€.'}), 400 | |
| # νμΌ λ΄μ© μ½κΈ° | |
| try: | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌμ μ½μ μ μμ΅λλ€: {str(e)}'}), 500 | |
| # μΉμ λΆν | |
| sections = split_content_by_episodes(content) | |
| episode_sections = [s for s in sections if s[0] != 'μνμ€λͺ '] | |
| if not episode_sections: | |
| return jsonify({'error': 'Graph Extractionν νμ°¨κ° μμ΅λλ€.'}), 400 | |
| # Parent Chunk κ°μ Έμ€κΈ° | |
| parent_chunk = None | |
| try: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| except: | |
| pass | |
| print(f"[λ¨κ³ 4: Graph Extraction] νμΌ ID {file_id}μ λν Graph Extraction μμ ({len(episode_sections)}κ° νμ°¨)") | |
| # κ° νμ°¨ Graph Extraction | |
| success_count = 0 | |
| for section_type, section_title, section_content, section_metadata in episode_sections: | |
| try: | |
| print(f"[λ¨κ³ 4: Graph Extraction] '{section_title}' Graph Extraction μ€...") | |
| success = extract_graph_from_episode( | |
| episode_content=section_content, | |
| episode_title=section_title, | |
| file_id=file_id, | |
| full_content=content, | |
| parent_chunk=parent_chunk, | |
| model_name=file.model_name | |
| ) | |
| if success: | |
| success_count += 1 | |
| print(f"[λ¨κ³ 4: Graph Extraction] '{section_title}' Graph Extraction μλ£") | |
| except Exception as e: | |
| print(f"[λ¨κ³ 4: Graph Extraction] '{section_title}' Graph Extraction μ€ μ€λ₯: {str(e)}") | |
| continue | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'episode_count': len(episode_sections), | |
| 'success_count': success_count, | |
| 'message': f'{success_count}/{len(episode_sections)}κ° νμ°¨ Graph Extractionμ΄ μλ£λμμ΅λλ€.', | |
| 'step': 'graph', | |
| 'completed': True | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'Graph Extraction μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'step': 'graph'}), 500 | |
| def create_file_metadata(file_id): | |
| """νμΌμ λͺ¨λ μ²ν¬μ λ©νλ°μ΄ν° μμ± (μλ μμ±)""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| # κΆν νμΈ | |
| if not current_user.is_admin and file.uploaded_by != current_user.id: | |
| return jsonify({'error': 'κΆνμ΄ μμ΅λλ€.'}), 403 | |
| # λͺ¨λΈλͺ νμΈ | |
| if not file.model_name: | |
| return jsonify({'error': 'νμΌμ μ°κ²°λ AI λͺ¨λΈμ΄ μμ΅λλ€. λ©νλ°μ΄ν°λ₯Ό μμ±ν μ μμ΅λλ€.'}), 400 | |
| # ν μ€νΈ νμΌλ§ κ°λ₯ | |
| if not file.original_filename.lower().endswith(('.txt', '.md')): | |
| return jsonify({'error': 'λ©νλ°μ΄ν°λ ν μ€νΈ νμΌ(.txt, .md)μλ§ μμ±ν μ μμ΅λλ€.'}), 400 | |
| # νμΌ λ΄μ© μ½κΈ° | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| # λͺ¨λ μ²ν¬ κ°μ Έμ€κΈ° | |
| chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index).all() | |
| if not chunks: | |
| return jsonify({'error': 'μ²ν¬κ° μμ΅λλ€. λ¨Όμ νμΌμ μ λ‘λνμΈμ.'}), 400 | |
| print(f"[λ©νλ°μ΄ν° μμ±] νμΌ ID {file_id}μ λν λ©νλ°μ΄ν° μμ± μμ") | |
| print(f"[λ©νλ°μ΄ν° μμ±] λͺ¨λΈλͺ : {file.model_name}") | |
| print(f"[λ©νλ°μ΄ν° μμ±] νμΌλͺ : {file.original_filename}") | |
| print(f"[λ©νλ°μ΄ν° μμ±] μ²ν¬ κ°μ: {len(chunks)}κ°") | |
| # κ° μ²ν¬μ λ©νλ°μ΄ν° μμ± | |
| success_count = 0 | |
| fail_count = 0 | |
| for chunk in chunks: | |
| try: | |
| # κΈ°μ‘΄ λ©νλ°μ΄ν° μ½κΈ° | |
| existing_metadata = {} | |
| if chunk.chunk_metadata: | |
| try: | |
| existing_metadata = json.loads(chunk.chunk_metadata) | |
| except: | |
| existing_metadata = {} | |
| # μ λ©νλ°μ΄ν° μΆμΆ | |
| new_metadata = extract_chunk_metadata( | |
| chunk_content=chunk.content, | |
| full_content=content, # μλ³Έ μΉμμ€ μ 체 λ΄μ© μ°Έμ‘° | |
| chunk_index=chunk.chunk_index, | |
| file_id=file_id, | |
| model_name=file.model_name | |
| ) | |
| # κΈ°μ‘΄ λ©νλ°μ΄ν°μ μ λ©νλ°μ΄ν° λ³ν© (μ λ©νλ°μ΄ν°κ° μ°μ ) | |
| # κΈ°μ‘΄ λ©νλ°μ΄ν°μ λͺ¨λ νλλ₯Ό μ μ§νλ, μλ‘ μΆμΆν νλλ‘ μ λ°μ΄νΈ | |
| # chapter νλλ νμΌ μ λ‘λ μ μΆκ°λ νμ°¨ μ 보μ΄λ―λ‘ μ μ§ | |
| merged_metadata = existing_metadata.copy() | |
| for key, value in new_metadata.items(): | |
| if value is not None and value != []: | |
| # 리μ€νΈμΈ κ²½μ° μ€λ³΅ μ κ±° ν λ³ν© | |
| if isinstance(value, list) and isinstance(merged_metadata.get(key), list): | |
| merged_list = merged_metadata.get(key, []).copy() | |
| for item in value: | |
| if item not in merged_list: | |
| merged_list.append(item) | |
| merged_metadata[key] = merged_list | |
| else: | |
| merged_metadata[key] = value | |
| # λ©νλ°μ΄ν°λ₯Ό JSON λ¬Έμμ΄λ‘ λ³ν | |
| metadata_json = json.dumps(merged_metadata, ensure_ascii=False) if merged_metadata else None | |
| # μ²ν¬μ λ©νλ°μ΄ν° μ μ₯ | |
| chunk.chunk_metadata = metadata_json | |
| success_count += 1 | |
| # μ§ν μν© μΆλ ₯ (10κ°λ§λ€) | |
| if (success_count + fail_count) % 10 == 0: | |
| print(f"[λ©νλ°μ΄ν° μμ±] μ§ν μ€: {success_count + fail_count}/{len(chunks)}κ° μ²ν¬ μ²λ¦¬ μ€...") | |
| except Exception as e: | |
| print(f"[λ©νλ°μ΄ν° μμ±] κ²½κ³ : μ²ν¬ {chunk.chunk_index} λ©νλ°μ΄ν° μμ± μ€ν¨: {str(e)}") | |
| fail_count += 1 | |
| continue | |
| # λ°μ΄ν°λ² μ΄μ€ μ»€λ° | |
| db.session.commit() | |
| print(f"[λ©νλ°μ΄ν° μμ±] μλ£: {success_count}κ° μ±κ³΅, {fail_count}κ° μ€ν¨") | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'total_chunks': len(chunks), | |
| 'success_count': success_count, | |
| 'fail_count': fail_count, | |
| 'message': f'λ©νλ°μ΄ν° μμ±μ΄ μλ£λμμ΅λλ€. (μ±κ³΅: {success_count}κ°, μ€ν¨: {fail_count}κ°)' | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[λ©νλ°μ΄ν° μμ±] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'λ©νλ°μ΄ν° μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_file(file_id): | |
| """μ λ‘λλ νμΌ μμ (μ°κ΄λ λͺ¨λ νμΌλ ν¨κ» μμ )""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| # μλ³Έ νμΌμΈ κ²½μ° (parent_file_idκ° NoneμΈ κ²½μ°) | |
| # μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌλ ν¨κ» μμ | |
| files_to_delete = [] | |
| if file.parent_file_id is None: | |
| # μλ³Έ νμΌμ΄λ©΄, μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌλ μ°Ύμμ μμ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| files_to_delete = [file] + child_files | |
| print(f"[νμΌ μμ ] μλ³Έ νμΌ μμ : {file.original_filename}, μ°κ΄ νμΌ {len(child_files)}κ°λ ν¨κ» μμ ") | |
| else: | |
| # μ΄μ΄μ μ λ‘λλ νμΌμ΄λ©΄ μλ³Έ νμΌλ ν¨κ» μμ | |
| parent_file = UploadedFile.query.get(file.parent_file_id) | |
| if parent_file: | |
| # μλ³Έ νμΌκ³Ό λͺ¨λ μ°κ΄ νμΌ μμ | |
| all_child_files = UploadedFile.query.filter_by(parent_file_id=file.parent_file_id).all() | |
| files_to_delete = [parent_file] + all_child_files | |
| print(f"[νμΌ μμ ] μ΄μ΄μ μ λ‘λλ νμΌ μμ : {file.original_filename}, μλ³Έ λ° μ°κ΄ νμΌ {len(all_child_files)}κ°λ ν¨κ» μμ ") | |
| else: | |
| files_to_delete = [file] | |
| deleted_count = 0 | |
| deleted_files = [] | |
| for file_to_delete in files_to_delete: | |
| try: | |
| # νμΌ μμ€ν μμ μμ | |
| if os.path.exists(file_to_delete.file_path): | |
| os.remove(file_to_delete.file_path) | |
| print(f"[νμΌ μμ ] νμΌ μμ€ν μμ μμ : {file_to_delete.file_path}") | |
| # κ΄λ ¨ Child Chunk (DocumentChunk) μμ | |
| child_chunk_count = DocumentChunk.query.filter_by(file_id=file_to_delete.id).count() | |
| if child_chunk_count > 0: | |
| DocumentChunk.query.filter_by(file_id=file_to_delete.id).delete() | |
| print(f"[νμΌ μμ ] Child Chunk {child_chunk_count}κ° μμ μλ£") | |
| # λ²‘ν° DBμμλ ν΄λΉ νμΌμ μ²ν¬ μμ | |
| try: | |
| vector_db = get_vector_db() | |
| vector_db.delete_chunks_by_file_id(file_to_delete.id) | |
| print(f"[νμΌ μμ ] λ²‘ν° DBμμ μ²ν¬ μμ μλ£") | |
| except Exception as vector_e: | |
| print(f"[νμΌ μμ ] λ²‘ν° DB μμ μ€λ₯ (무μ): {str(vector_e)}") | |
| # κ΄λ ¨ Parent Chunk μμ | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_to_delete.id).first() | |
| if parent_chunk: | |
| db.session.delete(parent_chunk) | |
| print(f"[νμΌ μμ ] Parent Chunk μμ μλ£") | |
| # κ΄λ ¨ EpisodeAnalysis μμ | |
| episode_analysis_count = EpisodeAnalysis.query.filter_by(file_id=file_to_delete.id).count() | |
| if episode_analysis_count > 0: | |
| EpisodeAnalysis.query.filter_by(file_id=file_to_delete.id).delete() | |
| print(f"[νμΌ μμ ] EpisodeAnalysis {episode_analysis_count}κ° μμ μλ£") | |
| # κ΄λ ¨ GraphRAG λ°μ΄ν° μμ (GraphEntity, GraphRelationship, GraphEvent) | |
| graph_entity_count = GraphEntity.query.filter_by(file_id=file_to_delete.id).count() | |
| if graph_entity_count > 0: | |
| GraphEntity.query.filter_by(file_id=file_to_delete.id).delete() | |
| print(f"[νμΌ μμ ] GraphEntity {graph_entity_count}κ° μμ μλ£") | |
| graph_relationship_count = GraphRelationship.query.filter_by(file_id=file_to_delete.id).count() | |
| if graph_relationship_count > 0: | |
| GraphRelationship.query.filter_by(file_id=file_to_delete.id).delete() | |
| print(f"[νμΌ μμ ] GraphRelationship {graph_relationship_count}κ° μμ μλ£") | |
| graph_event_count = GraphEvent.query.filter_by(file_id=file_to_delete.id).count() | |
| if graph_event_count > 0: | |
| GraphEvent.query.filter_by(file_id=file_to_delete.id).delete() | |
| print(f"[νμΌ μμ ] GraphEvent {graph_event_count}κ° μμ μλ£") | |
| deleted_files.append(file_to_delete.original_filename) | |
| db.session.delete(file_to_delete) | |
| deleted_count += 1 | |
| print(f"[νμΌ μμ ] λ°μ΄ν°λ² μ΄μ€μμ νμΌ μμ μλ£: {file_to_delete.original_filename}") | |
| except Exception as e: | |
| print(f"[νμΌ μμ μ€λ₯] {file_to_delete.original_filename}: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| db.session.commit() | |
| message = f'νμΌμ΄ μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.' | |
| if deleted_count > 1: | |
| message = f'νμΌ {deleted_count}κ°κ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€. (μλ³Έ λ° μ°κ΄ νμΌ ν¬ν¨)' | |
| return jsonify({ | |
| 'message': message, | |
| 'deleted_count': deleted_count, | |
| 'deleted_files': deleted_files | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'νμΌ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def toggle_file_public(file_id): | |
| """νμΌ κ³΅κ° μ¬λΆ λ³κ²½ (κ΄λ¦¬μλ§ κ°λ₯)""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| data = request.get_json() | |
| is_public = data.get('is_public', False) | |
| file.is_public = is_public | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': f'νμΌμ΄ {"곡κ°" if is_public else "λΉκ³΅κ°"}λ‘ μ€μ λμμ΅λλ€.', | |
| 'file': file.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'νμΌ κ³΅κ° μ¬λΆ λ³κ²½ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_content(file_id): | |
| """μ λ‘λλ νμΌ λ΄μ© μ‘°ν""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| if not os.path.exists(file.file_path): | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| # ν μ€νΈ νμΌ μ½κΈ° | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| # UTF-8λ‘ μ½μ μ μμΌλ©΄ λ€λ₯Έ μΈμ½λ© μλ | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| return jsonify({ | |
| 'content': content, | |
| 'filename': file.original_filename | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌ λ΄μ© μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_chat_sessions(): | |
| """μ¬μ©μμ λν μΈμ λͺ©λ‘ μ‘°ν (μ΅κ·Ό 20κ°λ§ νμ)""" | |
| try: | |
| sessions = ChatSession.query.filter_by(user_id=current_user.id)\ | |
| .order_by(ChatSession.updated_at.desc())\ | |
| .limit(20).all() | |
| return jsonify({ | |
| 'sessions': [session.to_dict() for session in sessions] | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_chat_session(): | |
| """μ λν μΈμ μμ±""" | |
| try: | |
| data = request.json | |
| title = data.get('title', 'μ λν') | |
| model_name = data.get('model_name', None) # νμ νΈνμ± | |
| analysis_model = data.get('analysis_model', None) | |
| answer_model = data.get('answer_model', None) | |
| session = ChatSession( | |
| user_id=current_user.id, | |
| title=title, | |
| model_name=model_name, # νμ νΈνμ± | |
| analysis_model=analysis_model, | |
| answer_model=answer_model | |
| ) | |
| db.session.add(session) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λν μΈμ μ΄ μμ±λμμ΅λλ€.', | |
| 'session': session.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_chat_session(session_id): | |
| """λν μΈμ μμΈ μ‘°ν (λ©μμ§ ν¬ν¨)""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| session_dict = session.to_dict() | |
| session_dict['messages'] = [msg.to_dict() for msg in session.messages] | |
| return jsonify({'session': session_dict}), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def update_chat_session(session_id): | |
| """λν μΈμ μμ (μ λͺ© λ±)""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| data = request.json | |
| if 'title' in data: | |
| session.title = data['title'] | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λν μΈμ μ΄ μμ λμμ΅λλ€.', | |
| 'session': session.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_chat_session(session_id): | |
| """λν μΈμ μμ """ | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| db.session.delete(session) | |
| db.session.commit() | |
| return jsonify({'message': 'λν μΈμ μ΄ μμ λμμ΅λλ€.'}), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def add_chat_message(session_id): | |
| """λν λ©μμ§ μΆκ°""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| data = request.json | |
| role = data.get('role', 'user') | |
| content = data.get('content', '') | |
| if not content: | |
| return jsonify({'error': 'λ©μμ§ λ΄μ©μ΄ νμν©λλ€.'}), 400 | |
| message = ChatMessage( | |
| session_id=session_id, | |
| role=role, | |
| content=content | |
| ) | |
| db.session.add(message) | |
| # μΈμ μ λͺ© μ λ°μ΄νΈ (첫 μ¬μ©μ λ©μμ§μΈ κ²½μ°) | |
| if not session.title or session.title == 'μ λν': | |
| if role == 'user': | |
| title = content[:30] + '...' if len(content) > 30 else content | |
| session.title = title | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λ©μμ§κ° μΆκ°λμμ΅λλ€.', | |
| 'chat_message': message.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λ©μμ§ μΆκ° μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |