SOY NV AI
λ©νλ°μ΄ν° μμ± κΈ°λ₯ κ°μ : κΈ°μ‘΄ λ©νλ°μ΄ν° λ³ν© λ° νμ°¨ μ 보 μ μ§
d234e06
| from flask import Blueprint, render_template, request, jsonify, send_from_directory, redirect, url_for, flash | |
| from flask_login import login_user, logout_user, login_required, current_user | |
| from werkzeug.utils import secure_filename | |
| from app.database import db, UploadedFile, User, ChatSession, ChatMessage, DocumentChunk, ParentChunk, SystemConfig | |
| from app.vector_db import get_vector_db | |
| from app.gemini_client import get_gemini_client | |
| import requests | |
| import os | |
| from datetime import datetime | |
| import uuid | |
| import re | |
| import json | |
| main_bp = Blueprint('main', __name__) | |
| def admin_required(f): | |
| """κ΄λ¦¬μ κΆνμ΄ νμν λ°μ½λ μ΄ν°""" | |
| from functools import wraps | |
| def decorated_function(*args, **kwargs): | |
| if not current_user.is_admin: | |
| # API μμ²μΈ κ²½μ° JSON μλ΅ λ°ν | |
| if request.path.startswith('/api/'): | |
| return jsonify({'error': 'κ΄λ¦¬μ κΆνμ΄ νμν©λλ€.'}), 403 | |
| flash('κ΄λ¦¬μ κΆνμ΄ νμν©λλ€.', 'error') | |
| return redirect(url_for('main.index')) | |
| return f(*args, **kwargs) | |
| return decorated_function | |
| # Ollama κΈ°λ³Έ URL (νκ²½ λ³μλ‘ μ€μ κ°λ₯) | |
| OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434') | |
| # μ λ‘λ μ€μ | |
| UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'uploads') | |
| ALLOWED_EXTENSIONS = {'txt', 'md', 'pdf', 'docx', 'epub'} | |
| # μ λ‘λ ν΄λ κ²½λ‘ μΆλ ₯ (λλ²κΉ μ©) | |
| print(f"[μ λ‘λ μ€μ ] μ λ‘λ ν΄λ κ²½λ‘: {UPLOAD_FOLDER}") | |
| print(f"[μ λ‘λ μ€μ ] μ λ‘λ ν΄λ μ‘΄μ¬ μ¬λΆ: {os.path.exists(UPLOAD_FOLDER)}") | |
| def allowed_file(filename): | |
| return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def ensure_upload_folder(): | |
| """μ λ‘λ ν΄λκ° μμΌλ©΄ μμ±""" | |
| try: | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| print(f"μ λ‘λ ν΄λ μμ± μ€: {UPLOAD_FOLDER}") | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| if not os.path.exists(UPLOAD_FOLDER): | |
| raise Exception(f'μ λ‘λ ν΄λλ₯Ό μμ±ν μ μμ΅λλ€: {UPLOAD_FOLDER}') | |
| # ν΄λ μ°κΈ° κΆν νμΈ | |
| test_file = os.path.join(UPLOAD_FOLDER, '.write_test') | |
| try: | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| print(f"μ λ‘λ ν΄λ μ°κΈ° κΆν νμΈ μλ£: {UPLOAD_FOLDER}") | |
| except PermissionError as e: | |
| raise Exception(f'μ λ‘λ ν΄λμ μ°κΈ° κΆνμ΄ μμ΅λλ€: {UPLOAD_FOLDER} - {str(e)}') | |
| except Exception as e: | |
| raise Exception(f'μ λ‘λ ν΄λ μ°κΈ° ν μ€νΈ μ€ν¨: {UPLOAD_FOLDER} - {str(e)}') | |
| except Exception as e: | |
| print(f"μ λ‘λ ν΄λ μμ± μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| raise | |
| def split_text_into_chunks(text, min_chunk_size=200, max_chunk_size=1000, overlap=150): | |
| """μλ―Έ κΈ°λ° ν μ€νΈ μ²νΉ (λ¬Έμ₯κ³Ό λ¬Έλ¨ κ²½κ³λ₯Ό κ³ λ €νμ¬ λΆν )""" | |
| if not text or len(text.strip()) == 0: | |
| return [] | |
| # 1λ¨κ³: λ¬Έλ¨ λ¨μλ‘ λΆν (λΉ μ€ κΈ°μ€) | |
| paragraphs = re.split(r'\n\s*\n', text.strip()) | |
| paragraphs = [p.strip() for p in paragraphs if p.strip()] | |
| if not paragraphs: | |
| return [] | |
| # 2λ¨κ³: κ° λ¬Έλ¨μ λ¬Έμ₯ λ¨μλ‘ λΆν | |
| # λ¬Έμ₯ μ’ κ²° κΈ°νΈ: . ! ? (νκΈκ³Ό μλ¬Έ λͺ¨λ μ§μ) | |
| # ꡬλμ λ€μ 곡백μ΄λ μ€λ°κΏμ΄ μ€λ κ²½μ° λ¬Έμ₯ μ’ λ£λ‘ κ°μ£Ό | |
| sentence_pattern = r'([.!?]+)(?=\s+|$)' | |
| all_sentences = [] | |
| for para in paragraphs: | |
| # λ¬Έμ₯ λΆλ¦¬ (ꡬλμ ν¬ν¨) | |
| parts = re.split(sentence_pattern, para) | |
| combined_sentences = [] | |
| current_sentence = "" | |
| for i, part in enumerate(parts): | |
| if part.strip(): | |
| if re.match(r'^[.!?]+$', part): | |
| # ꡬλμ μΈ κ²½μ° νμ¬ λ¬Έμ₯μ μΆκ°νκ³ λ¬Έμ₯ μμ± | |
| current_sentence += part | |
| if current_sentence.strip(): | |
| combined_sentences.append(current_sentence.strip()) | |
| current_sentence = "" | |
| else: | |
| # ν μ€νΈμΈ κ²½μ° νμ¬ λ¬Έμ₯μ μΆκ° | |
| current_sentence += part | |
| # λ§μ§λ§ λ¬Έμ₯ μ²λ¦¬ (ꡬλμ μ΄ μλ κ²½μ°) | |
| if current_sentence.strip(): | |
| combined_sentences.append(current_sentence.strip()) | |
| # λ¬Έμ₯μ΄ νλλ μλ κ²½μ° (ꡬλμ μ΄ μ ν μλ λ¬Έλ¨) | |
| if not combined_sentences and para.strip(): | |
| combined_sentences.append(para.strip()) | |
| all_sentences.extend(combined_sentences) | |
| if not all_sentences: | |
| # λ¬Έμ₯ λΆλ¦¬κ° μ λλ κ²½μ° μλ³Έ ν μ€νΈλ₯Ό κ·Έλλ‘ λ°ν | |
| return [text] if text.strip() else [] | |
| # 3λ¨κ³: λ¬Έμ₯λ€μ λͺ¨μμ μλ―Έ μλ μ²ν¬ μμ± | |
| chunks = [] | |
| current_chunk = [] | |
| current_size = 0 | |
| for sentence in all_sentences: | |
| sentence_size = len(sentence) | |
| # νμ¬ μ²ν¬μ λ¬Έμ₯ μΆκ° μ μ΅λ ν¬κΈ°λ₯Ό μ΄κ³Όνλ κ²½μ° | |
| if current_size + sentence_size > max_chunk_size and current_chunk: | |
| # νμ¬ μ²ν¬ μ μ₯ (μ€λ°κΏ μ μ§) | |
| chunk_text = '\n'.join(current_chunk) | |
| if len(chunk_text.strip()) >= min_chunk_size: | |
| chunks.append(chunk_text) | |
| else: | |
| # μ΅μ ν¬κΈ° λ―Έλ§μ΄λ©΄ λ€μ μ²ν¬μ λ³ν© (μ€λ²λ© ν¨κ³Ό) | |
| if chunks: | |
| chunks[-1] = chunks[-1] + '\n' + chunk_text | |
| else: | |
| chunks.append(chunk_text) | |
| # μ€λ²λ©μ μν λ¬Έμ₯ μ μ§ (λ§μ§λ§ λͺ λ¬Έμ₯μ λ€μ μ²ν¬μ ν¬ν¨) | |
| overlap_sentences = [] | |
| overlap_size = 0 | |
| for s in reversed(current_chunk): | |
| if overlap_size + len(s) <= overlap: | |
| overlap_sentences.insert(0, s) | |
| overlap_size += len(s) + 1 # μ€λ°κΏ ν¬ν¨ | |
| else: | |
| break | |
| current_chunk = overlap_sentences + [sentence] | |
| current_size = overlap_size + sentence_size | |
| else: | |
| # νμ¬ μ²ν¬μ λ¬Έμ₯ μΆκ° | |
| current_chunk.append(sentence) | |
| current_size += sentence_size + 1 # μ€λ°κΏ ν¬ν¨ | |
| # λ§μ§λ§ μ²ν¬ μΆκ° | |
| if current_chunk: | |
| chunk_text = '\n'.join(current_chunk) | |
| if chunks and len(chunk_text.strip()) < min_chunk_size: | |
| # μ΅μ ν¬κΈ° λ―Έλ§μ΄λ©΄ μ΄μ μ²ν¬μ λ³ν© | |
| chunks[-1] = chunks[-1] + '\n' + chunk_text | |
| else: | |
| chunks.append(chunk_text) | |
| # λΉ μ²ν¬ μ κ±° λ° μ΅μ ν¬κΈ° λ―Έλ§ μ²ν¬ μ²λ¦¬ | |
| final_chunks = [] | |
| for chunk in chunks: | |
| chunk = chunk.strip() | |
| if chunk and len(chunk) >= min_chunk_size: | |
| final_chunks.append(chunk) | |
| elif chunk: | |
| # μ΅μ ν¬κΈ° λ―Έλ§ μ²ν¬λ μ΄μ μ²ν¬μ λ³ν© | |
| if final_chunks: | |
| final_chunks[-1] = final_chunks[-1] + '\n' + chunk | |
| else: | |
| final_chunks.append(chunk) | |
| return final_chunks if final_chunks else [text] if text.strip() else [] | |
| def extract_chapter_number(text): | |
| """ν μ€νΈμμ μ±ν° λ²νΈ μΆμΆ""" | |
| # λ€μν μ±ν° ν¨ν΄ λ§€μΉ | |
| patterns = [ | |
| r'μ \s*(\d+)\s*μ₯', # μ 1μ₯, μ 1 μ₯ | |
| r'μ \s*(\d+)\s*ν', # μ 1ν | |
| r'Chapter\s*(\d+)', # Chapter 1 | |
| r'CHAPTER\s*(\d+)', # CHAPTER 1 | |
| r'Ch\.\s*(\d+)', # Ch. 1 | |
| r'(\d+)\s*μ₯', # 1μ₯ | |
| r'(\d+)\s*ν', # 1ν | |
| r'CHAPTER\s*(\d+)', # CHAPTER 1 | |
| r'chap\.\s*(\d+)', # chap. 1 | |
| r'ch\s*(\d+)', # ch 1 | |
| r'(\d+)\s*η« ', # 1η« | |
| ] | |
| # ν μ€νΈμ μ²μ 500μλ§ κ²μ¬ (μ±ν° μ 보λ λ³΄ν΅ μλΆλΆμ μμ) | |
| search_text = text[:500] | |
| for pattern in patterns: | |
| match = re.search(pattern, search_text, re.IGNORECASE) | |
| if match: | |
| try: | |
| chapter_num = int(match.group(1)) | |
| return chapter_num | |
| except: | |
| continue | |
| return None | |
| def split_content_by_episodes(content): | |
| """μλ³Έ μΉμμ€μ #μνμ€λͺ , #1ν, #2ν λ±μΌλ‘ λΆν | |
| Returns: | |
| list: [(section_type, section_title, section_content, metadata), ...] | |
| section_type: 'μνμ€λͺ ' or 'ν' | |
| section_title: 'μνμ€λͺ ' or '1ν', '2ν', ... | |
| metadata: {'chapter': '#μνμ€λͺ '} or {'chapter': '1ν'} | |
| """ | |
| if not content or len(content.strip()) == 0: | |
| return [] | |
| sections = [] | |
| # #μνμ€λͺ , #1ν, #2ν λ±μ ν¨ν΄ μ°ΎκΈ° | |
| # ν¨ν΄: #μνμ€λͺ , #1ν, #2ν, #10ν λ± | |
| episode_pattern = r'^#\s*(μνμ€λͺ |\d+ν)' | |
| lines = content.split('\n') | |
| current_section_type = None | |
| current_section_title = None | |
| current_section_content = [] | |
| current_section_start_line = 0 | |
| for i, line in enumerate(lines): | |
| # μ€ μμ λΆλΆμμ #μνμ€λͺ λλ #nν ν¨ν΄ μ°ΎκΈ° | |
| match = re.match(episode_pattern, line.strip()) | |
| if match: | |
| # μ΄μ μΉμ μ μ₯ | |
| if current_section_type and current_section_content: | |
| section_content = '\n'.join(current_section_content).strip() | |
| if section_content: | |
| # λ©νλ°μ΄ν° μμ± | |
| if current_section_type == 'μνμ€λͺ ': | |
| metadata = {'chapter': '#μνμ€λͺ '} | |
| else: | |
| metadata = {'chapter': current_section_title} | |
| sections.append(( | |
| current_section_type, | |
| current_section_title, | |
| section_content, | |
| metadata | |
| )) | |
| # μ μΉμ μμ | |
| section_title = match.group(1) | |
| if section_title == 'μνμ€λͺ ': | |
| current_section_type = 'μνμ€λͺ ' | |
| current_section_title = 'μνμ€λͺ ' | |
| else: | |
| current_section_type = 'ν' | |
| current_section_title = section_title # '1ν', '2ν' λ± | |
| current_section_content = [line] # ν€λ λΌμΈ ν¬ν¨ | |
| current_section_start_line = i | |
| else: | |
| # νμ¬ μΉμ μ λ΄μ© μΆκ° | |
| if current_section_content is not None: | |
| current_section_content.append(line) | |
| # λ§μ§λ§ μΉμ μ μ₯ | |
| if current_section_type and current_section_content: | |
| section_content = '\n'.join(current_section_content).strip() | |
| if section_content: | |
| # λ©νλ°μ΄ν° μμ± | |
| if current_section_type == 'μνμ€λͺ ': | |
| metadata = {'chapter': '#μνμ€λͺ '} | |
| else: | |
| metadata = {'chapter': current_section_title} | |
| sections.append(( | |
| current_section_type, | |
| current_section_title, | |
| section_content, | |
| metadata | |
| )) | |
| # μΉμ μ΄ νλλ μμΌλ©΄ μ 체λ₯Ό νλμ μΉμ μΌλ‘ μ²λ¦¬ | |
| if not sections: | |
| sections.append(( | |
| 'κΈ°ν', | |
| 'μ 체', | |
| content.strip(), | |
| {'chapter': None} | |
| )) | |
| return sections | |
| def extract_metadata_with_ai(chunk_content, full_content=None, parent_chunk=None, model_name=None): | |
| """AIλ₯Ό μ¬μ©νμ¬ μ²ν¬μ λ©νλ°μ΄ν° μΆμΆ (νμ, λ±μ₯μΈλ¬Ό, μκ°μ λ°°κ²½, μΈλ¬Ό κ΄κ³) | |
| Args: | |
| chunk_content: λΆμν μ²ν¬ λ΄μ© | |
| full_content: μλ³Έ μΉμμ€ μ 체 λ΄μ© (μΈλ¬Ό κ΄κ³ νμ μ©) | |
| parent_chunk: Parent Chunk κ°μ²΄ (μ νμ¬ν) | |
| model_name: μ¬μ©ν AI λͺ¨λΈλͺ | |
| """ | |
| try: | |
| # μλ³Έ μΉμμ€ μ 체 λ΄μ©μ μ°Έμ‘°νμ¬ μΈλ¬Ό κ΄κ³ νμ | |
| full_content_preview = "" | |
| if full_content: | |
| # μ 체 λ΄μ©μ΄ λ무 κΈΈλ©΄ μλΆλΆκ³Ό λ·λΆλΆ μΌλΆλ§ μ¬μ© (μ΅λ 20000μ) | |
| if len(full_content) > 20000: | |
| full_content_preview = full_content[:10000] + "\n... (μ€κ° μλ΅) ...\n" + full_content[-10000:] | |
| else: | |
| full_content_preview = full_content | |
| # ν둬ννΈ μμ± | |
| prompt = f"""λ€μ μΉμμ€ ν μ€νΈλ₯Ό λΆμνμ¬ μλ μ 보λ₯Ό JSON νμμΌλ‘λ§ μλ΅νμΈμ. | |
| μλ³Έ μΉμμ€ μ 체 λ΄μ© (μ°Έκ³ μ©): | |
| {full_content_preview[:50000] if full_content_preview else "μμ"} | |
| λΆμν μ²ν¬ ν μ€νΈ: | |
| {chunk_content[:2000]} | |
| λ€μ νμμΌλ‘λ§ μλ΅νμΈμ (JSON νμ): | |
| {{ | |
| "pov": "νμ/μμ μ μ€λͺ νμΈμ (μ: 1μΈμΉ μ£ΌμΈκ³΅, 3μΈμΉ μ μ§μ μκ° λ±)", | |
| "characters": ["λ±μ₯μΈλ¬Ό1", "λ±μ₯μΈλ¬Ό2"], | |
| "time_background": "μκ°μ λ°°κ²½ μ€λͺ (μ: κ³Όκ±° νμ, νμ¬ μμ , λ―Έλ λ±)", | |
| "character_relationships": [ | |
| {{ | |
| "character1": "μΈλ¬Ό1", | |
| "character2": "μΈλ¬Ό2", | |
| "relationship": "νμ¬ μμ μμμ κ΄κ³ μ€λͺ (μ: μ°μΈ, μ , μΉκ΅¬, κ°μ‘± λ±)" | |
| }} | |
| ] | |
| }} | |
| character_relationshipsλ μ΄ μ²ν¬μ λ±μ₯νλ μΈλ¬Όλ€ κ°μ νμ¬ κ΄κ³λ₯Ό μλ³Έ μΉμμ€ μ 체 λ΄μ©μ μ°Έκ³ νμ¬ νμ ν κ²μ λλ€. | |
| μλ΅μ μ€μ§ JSON νμλ§ μ¬μ©νκ³ , λ€λ₯Έ μ€λͺ μ ν¬ν¨νμ§ λ§μΈμ.""" | |
| # λͺ¨λΈλͺ μ΄ μμΌλ©΄ κΈ°λ³Έκ° μ¬μ© (Gemini μ°μ μλ) | |
| if not model_name: | |
| # Gemini μλ | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name="gemini-1.5-flash", | |
| temperature=0.3, | |
| max_output_tokens=500 | |
| ) | |
| if not result['error'] and result.get('response'): | |
| response_text = result['response'].strip() | |
| # JSON μΆμΆ | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| metadata = json.loads(json_match.group(0)) | |
| return metadata | |
| except: | |
| pass | |
| # λͺ¨λΈλͺ μ΄ μκ±°λ Gemini μ€ν¨ μ ν΄λΉ λͺ¨λΈ μ¬μ© | |
| if model_name: | |
| model_name_lower = model_name.lower().strip() | |
| is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-') | |
| if is_gemini: | |
| gemini_model_name = model_name.strip() | |
| if gemini_model_name.lower().startswith('gemini:'): | |
| gemini_model_name = gemini_model_name.split(':', 1)[1].strip() | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| result = gemini_client.generate_response( | |
| prompt=prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.3, | |
| max_output_tokens=500 | |
| ) | |
| if not result['error'] and result.get('response'): | |
| response_text = result['response'].strip() | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| metadata = json.loads(json_match.group(0)) | |
| return metadata | |
| else: | |
| # Ollama API νΈμΆ | |
| try: | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/generate', | |
| json={ | |
| 'model': model_name, | |
| 'prompt': prompt, | |
| 'stream': False, | |
| 'options': { | |
| 'temperature': 0.3, | |
| 'num_predict': 500 | |
| } | |
| }, | |
| timeout=30 | |
| ) | |
| if ollama_response.status_code == 200: | |
| response_data = ollama_response.json() | |
| response_text = response_data.get('response', '').strip() | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| metadata = json.loads(json_match.group(0)) | |
| return metadata | |
| except: | |
| pass | |
| # AI μΆμΆ μ€ν¨ μ κΈ°λ³Έκ° λ°ν | |
| return { | |
| "pov": None, | |
| "characters": [], | |
| "time_background": None, | |
| "character_relationships": [] | |
| } | |
| except Exception as e: | |
| print(f"[λ©νλ°μ΄ν° μΆμΆ] μ€λ₯: {str(e)}") | |
| return { | |
| "pov": None, | |
| "characters": [], | |
| "time_background": None, | |
| "character_relationships": [] | |
| } | |
| def extract_chunk_metadata(chunk_content, full_content=None, chunk_index=None, file_id=None, model_name=None): | |
| """μ²ν¬μ λ©νλ°μ΄ν° μΆμΆ (νμ, λ±μ₯μΈλ¬Ό, μκ°μ λ°°κ²½, μΈλ¬Ό κ΄κ³) | |
| Args: | |
| chunk_content: λΆμν μ²ν¬ λ΄μ© | |
| full_content: μλ³Έ μΉμμ€ μ 체 λ΄μ© (μΈλ¬Ό κ΄κ³ νμ μ©) | |
| chunk_index: μ²ν¬ μΈλ±μ€ | |
| file_id: νμΌ ID | |
| model_name: μ¬μ©ν AI λͺ¨λΈλͺ | |
| """ | |
| metadata = { | |
| "pov": None, | |
| "characters": [], | |
| "time_background": None, | |
| "character_relationships": [] | |
| } | |
| # AIλ₯Ό μ¬μ©ν λ©νλ°μ΄ν° μΆμΆ (νμ, λ±μ₯μΈλ¬Ό, μκ°μ λ°°κ²½, μΈλ¬Ό κ΄κ³) | |
| # Parent Chunkκ° μμΌλ©΄ μ°Έμ‘° | |
| parent_chunk = None | |
| if file_id: | |
| try: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| except: | |
| pass | |
| # μλ³Έ μΉμμ€ μ 체 λ΄μ©μ μ°Έμ‘°νμ¬ λ©νλ°μ΄ν° μΆμΆ | |
| ai_metadata = extract_metadata_with_ai(chunk_content, full_content, parent_chunk, model_name) | |
| if ai_metadata: | |
| metadata["pov"] = ai_metadata.get("pov") | |
| metadata["characters"] = ai_metadata.get("characters", []) | |
| metadata["time_background"] = ai_metadata.get("time_background") | |
| metadata["character_relationships"] = ai_metadata.get("character_relationships", []) | |
| return metadata | |
| def create_chunks_for_file(file_id, content): | |
| """νμΌ λ΄μ©μ μΉμ λ³λ‘ λΆν νμ¬ μλ―Έ κΈ°λ° μ²ν¬λ‘ μ μ₯ (λ²‘ν° DB ν¬ν¨) | |
| μΉμ λΆν κ·μΉ: | |
| - #μνμ€λͺ λΆν° #1νκΉμ§: 'μνμ€λͺ ' μΉμ , λ©νλ°μ΄ν°μ #μνμ€λͺ μΆκ° | |
| - #nνλΆν° #n+1νκΉμ§: 'nν' μΉμ , λ©νλ°μ΄ν°μ νμ°¨ μ 보(nν) μΆκ° | |
| Args: | |
| file_id: νμΌ ID | |
| content: νμΌ λ΄μ© | |
| """ | |
| try: | |
| print(f"[μ²ν¬ μμ±] νμΌ ID {file_id}μ λν μ²ν¬ μμ± μμ") | |
| print(f"[μ²ν¬ μμ±] μλ³Έ ν μ€νΈ κΈΈμ΄: {len(content)}μ") | |
| # νμΌ μ 보 κ°μ Έμ€κΈ° (λͺ¨λΈλͺ λ±) | |
| uploaded_file = UploadedFile.query.get(file_id) | |
| model_name = uploaded_file.model_name if uploaded_file else None | |
| # λ²‘ν° DB λ§€λμ κ°μ Έμ€κΈ° | |
| vector_db = get_vector_db() | |
| # κΈ°μ‘΄ μ²ν¬ μμ (DB + λ²‘ν° DB) | |
| existing_chunks = DocumentChunk.query.filter_by(file_id=file_id).all() | |
| if existing_chunks: | |
| print(f"[μ²ν¬ μμ±] κΈ°μ‘΄ μ²ν¬ {len(existing_chunks)}κ° μμ μ€...") | |
| # λ²‘ν° DBμμ μμ | |
| vector_db.delete_chunks_by_file_id(file_id) | |
| # DBμμ μμ | |
| DocumentChunk.query.filter_by(file_id=file_id).delete() | |
| db.session.commit() | |
| # μλ³Έ μΉμμ€μ μΉμ λ³λ‘ λΆν (#μνμ€λͺ , #1ν, #2ν λ±) | |
| sections = split_content_by_episodes(content) | |
| print(f"[μ²ν¬ μμ±] μΉμ λΆν μλ£: {len(sections)}κ° μΉμ ") | |
| for i, (section_type, section_title, section_content, section_metadata) in enumerate(sections): | |
| print(f"[μ²ν¬ μμ±] μΉμ {i+1}: {section_title} ({len(section_content)}μ)") | |
| if len(sections) == 0: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μΉμ μ΄ μμ±λμ§ μμμ΅λλ€.") | |
| return 0 | |
| # κ° μΉμ λ³λ‘ μ²ν¬ μμ± λ° μ μ₯ | |
| saved_count = 0 | |
| vector_saved_count = 0 | |
| global_chunk_index = 0 # μ 체 μ²ν¬ μΈλ±μ€ | |
| for section_idx, (section_type, section_title, section_content, section_metadata) in enumerate(sections): | |
| print(f"[μ²ν¬ μμ±] μΉμ '{section_title}' μ²λ¦¬ μ€... ({len(section_content)}μ)") | |
| # κ° μΉμ μ μλ―Έ κΈ°λ° μ²νΉ (λ¬Έμ₯κ³Ό λ¬Έλ¨ κ²½κ³λ₯Ό κ³ λ €νμ¬ λΆν ) | |
| # min_chunk_size: μ΅μ 200μ, max_chunk_size: μ΅λ 1000μ, overlap: 150μ | |
| section_chunks = split_text_into_chunks(section_content, min_chunk_size=200, max_chunk_size=1000, overlap=150) | |
| print(f"[μ²ν¬ μμ±] μΉμ '{section_title}' λΆν λ μ²ν¬ μ: {len(section_chunks)}κ°") | |
| # κ° μ²ν¬λ₯Ό λ°μ΄ν°λ² μ΄μ€μ λ²‘ν° DBμ μ μ₯ | |
| for chunk_idx, chunk_content in enumerate(section_chunks): | |
| try: | |
| # μΉμ λ©νλ°μ΄ν°λ₯Ό κΈ°λ³ΈμΌλ‘ μ¬μ© (chapter μ 보 ν¬ν¨) | |
| chunk_metadata = section_metadata.copy() | |
| # DBμ μ²ν¬ μ μ₯ (μΉμ λ©νλ°μ΄ν° ν¬ν¨) | |
| chunk = DocumentChunk( | |
| file_id=file_id, | |
| chunk_index=global_chunk_index, | |
| content=chunk_content, | |
| chunk_metadata=json.dumps(chunk_metadata, ensure_ascii=False) # μΉμ λ©νλ°μ΄ν° μ μ₯ | |
| ) | |
| db.session.add(chunk) | |
| db.session.flush() # ID μμ± | |
| # λ²‘ν° DBμ μ²ν¬ μΆκ° | |
| if vector_db.add_chunk( | |
| chunk_id=chunk.id, | |
| chunk_content=chunk_content, | |
| file_id=file_id, | |
| chunk_index=global_chunk_index | |
| ): | |
| vector_saved_count += 1 | |
| saved_count += 1 | |
| global_chunk_index += 1 | |
| # μ§ν μν© μΆλ ₯ (10κ°λ§λ€) | |
| if saved_count % 10 == 0: | |
| print(f"[μ²ν¬ μμ±] μ§ν μ€: {saved_count}κ° μ²ν¬ μ μ₯ μ€... (DB: {saved_count}, λ²‘ν° DB: {vector_saved_count})") | |
| except Exception as e: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μ²ν¬ {global_chunk_index} μ μ₯ μ€ μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| continue | |
| db.session.commit() | |
| print(f"[μ²ν¬ μμ±] μλ£: {saved_count}κ° μ²ν¬κ° λ°μ΄ν°λ² μ΄μ€μ μ μ₯λμμ΅λλ€. (λ²‘ν° DB: {vector_saved_count}κ°)") | |
| # μ μ₯ νμΈ | |
| verified_count = DocumentChunk.query.filter_by(file_id=file_id).count() | |
| if verified_count != saved_count: | |
| print(f"[μ²ν¬ μμ±] κ²½κ³ : μ μ₯λ μ²ν¬ μ({saved_count})μ νμΈλ μ²ν¬ μ({verified_count})κ° μΌμΉνμ§ μμ΅λλ€.") | |
| else: | |
| print(f"[μ²ν¬ μμ±] κ²μ¦ μλ£: {verified_count}κ° μ²ν¬κ° μ μμ μΌλ‘ μ μ₯λμμ΅λλ€.") | |
| return saved_count | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[μ²ν¬ μμ±] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return 0 | |
| def create_parent_chunk_with_ai(file_id, content, model_name): | |
| """AIλ₯Ό μ¬μ©νμ¬ Parent Chunk μμ± (μΉμμ€ λΆμ)""" | |
| try: | |
| print(f"[Parent Chunk μμ±] νμΌ ID {file_id}μ λν Parent Chunk μμ± μμ") | |
| print(f"[Parent Chunk μμ±] μ¬μ© λͺ¨λΈ: {model_name}") | |
| print(f"[Parent Chunk μμ±] μλ³Έ ν μ€νΈ κΈΈμ΄: {len(content)}μ") | |
| # λͺ¨λΈλͺ μ΄ Noneμ΄κ±°λ λΉ λ¬Έμμ΄μΈ κ²½μ° μ²λ¦¬ | |
| if not model_name or not model_name.strip(): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: λͺ¨λΈλͺ μ΄ μ 곡λμ§ μμμ΅λλ€.") | |
| return None | |
| # ν μ€νΈκ° λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 50000μ) | |
| content_preview = content[:50000] if len(content) > 50000 else content | |
| if len(content) > 50000: | |
| print(f"[Parent Chunk μμ±] ν μ€νΈκ° κΈΈμ΄ μΌλΆλ§ μ¬μ©: {len(content_preview)}μ (μ 체: {len(content)}μ)") | |
| # λΆμ ν둬ννΈ μμ± | |
| analysis_prompt = f"""λ€μ μΉμμ€ ν μ€νΈλ₯Ό λΆμνμ¬ λ€μ νλͺ©λ€μ μμ±ν΄μ£ΌμΈμ. κ° νλͺ©μ λͺ ννκ³ κ΅¬μ²΄μ μΌλ‘ μμ±ν΄μ£ΌμΈμ. | |
| ν μ€νΈ λ΄μ©: | |
| {content_preview} | |
| μ ν μ€νΈλ₯Ό λΆμνμ¬ λ€μ νμμΌλ‘ λ΅λ³ν΄μ£ΌμΈμ: | |
| ## μΈκ³κ΄ μ€λͺ | |
| [μΈκ³κ΄μ λν μμΈν μ€λͺ μ μμ±νμΈμ. λ°°κ²½, μ€μ , κ·μΉ λ±μ ν¬ν¨νμΈμ.] | |
| ## μ£Όμ μΊλ¦ν° λΆμ | |
| [μ£Όμ λ±μ₯μΈλ¬Όλ€μ μ΄λ¦, μν , μ±κ²©, νΉμ§ λ±μ λΆμνμ¬ μμ±νμΈμ. κ° μΊλ¦ν°λ³λ‘ ꡬλΆνμ¬ μμ±νμΈμ.] | |
| ## μ£Όμ μ€ν 리 λΆμ | |
| [μ 체μ μΈ μ€ν 리 νλ¦, μ£Όμ μ¬κ±΄, κ°λ± ꡬ쑰 λ±μ λΆμνμ¬ μμ±νμΈμ.] | |
| ## μ£Όμ μνΌμλ λΆμ | |
| [μ€μν μνΌμλλ μ±ν°λ³ μ£Όμ λ΄μ©μ λΆμνμ¬ μμ±νμΈμ. μκ° μμλλ‘ μ 리νλ©΄ μ’μ΅λλ€.] | |
| ## κΈ°ν | |
| [μ μΉ΄ν κ³ λ¦¬μ ν¬ν¨λμ§ μμ§λ§ μ€μν μ 보λ νΉμ§ λ±μ μμ±νμΈμ.] | |
| κ° νλͺ©μ λͺ ννκ² κ΅¬λΆνμ¬ μμ±ν΄μ£ΌμΈμ.""" | |
| # λͺ¨λΈ νμ νμΈ (Gemini λλ Ollama) | |
| # Gemini λͺ¨λΈλͺ νμ: "gemini:λͺ¨λΈλͺ " λλ "gemini-1.5-flash" (μ λμ¬ μλ κ²½μ°λ μ§μ) | |
| model_name_lower = model_name.lower().strip() | |
| is_gemini = model_name_lower.startswith('gemini:') or model_name_lower.startswith('gemini-') | |
| print(f"[Parent Chunk μμ±] λͺ¨λΈ νμ νμΈ: is_gemini={is_gemini}, model_name={model_name}") | |
| if is_gemini: | |
| # Gemini API νΈμΆ | |
| # λͺ¨λΈλͺ μμ "gemini:" μ λμ¬ μ κ±° (λμλ¬Έμ κ΅¬λΆ μμ΄) | |
| gemini_model_name = model_name.strip() | |
| if gemini_model_name.lower().startswith('gemini:'): | |
| gemini_model_name = gemini_model_name.split(':', 1)[1].strip() | |
| # "gemini-"λ‘ μμνλ κ²½μ° (μ: "gemini-1.5-flash") κ·Έλλ‘ μ¬μ© | |
| print(f"[Parent Chunk μμ±] Gemini APIμ λΆμ μμ² μ μ‘ μ€... (λͺ¨λΈ: {gemini_model_name})") | |
| print(f"[Parent Chunk μμ±] μλ³Έ λͺ¨λΈλͺ : {model_name} -> Gemini λͺ¨λΈλͺ : {gemini_model_name}") | |
| gemini_client = get_gemini_client() | |
| if not gemini_client.is_configured(): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API ν€κ° μ€μ λμ§ μμμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: Gemini ν΄λΌμ΄μΈνΈ μν νμΈ μ€...") | |
| # API ν€ μν λ€μ νμΈ | |
| from app.gemini_client import get_gemini_api_key | |
| api_key = get_gemini_api_key() | |
| if api_key: | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: API ν€λ μ‘΄μ¬νμ§λ§ ν΄λΌμ΄μΈνΈκ° μ€μ λμ§ μμμ΅λλ€. (κΈΈμ΄: {len(api_key)})") | |
| else: | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: API ν€κ° λ°μ΄ν°λ² μ΄μ€μ μμ΅λλ€.") | |
| return None | |
| print(f"[Parent Chunk μμ±] Gemini API ν€ νμΈ μλ£. API νΈμΆ μμ...") | |
| result = gemini_client.generate_response( | |
| prompt=analysis_prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.7, | |
| max_output_tokens=8192 | |
| ) | |
| if result['error']: | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API νΈμΆ μ€ν¨ - {result['error']}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: result κ°μ²΄ λ΄μ©: {result}") | |
| return None | |
| if not result.get('response'): | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: Gemini API μλ΅μ΄ λΉμ΄μμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: result κ°μ²΄ λ΄μ©: {result}") | |
| return None | |
| analysis_result = result['response'] | |
| print(f"[Parent Chunk μμ±] Gemini API μλ΅ μμ μ±κ³΅: {len(analysis_result)}μ") | |
| else: | |
| # Ollama API νΈμΆ | |
| print(f"[Parent Chunk μμ±] Ollama APIμ λΆμ μμ² μ μ‘ μ€... (λͺ¨λΈ: {model_name})") | |
| try: | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/chat', | |
| json={ | |
| 'model': model_name, | |
| 'messages': [ | |
| { | |
| 'role': 'user', | |
| 'content': analysis_prompt | |
| } | |
| ], | |
| 'stream': False | |
| }, | |
| timeout=300 # 5λΆ νμμμ | |
| ) | |
| if ollama_response.status_code != 200: | |
| error_detail = ollama_response.text if ollama_response.text else 'μμΈ μ 보 μμ' | |
| if ollama_response.status_code == 404: | |
| error_msg = f'Ollama API μ€λ₯ 404: λͺ¨λΈ "{model_name}"μ(λ₯Ό) μ°Ύμ μ μμ΅λλ€. λͺ¨λΈμ΄ Ollamaμ μ€μΉλμ΄ μλμ§ νμΈνμΈμ.' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: λ§μ½ Gemini λͺ¨λΈμ μ¬μ©νλ €λ©΄ λͺ¨λΈλͺ μ΄ 'gemini:' λλ 'gemini-'λ‘ μμν΄μΌ ν©λλ€.") | |
| else: | |
| error_msg = f'Ollama API μ€λ₯: {ollama_response.status_code} - {error_detail[:200]}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| return None | |
| response_data = ollama_response.json() | |
| analysis_result = response_data.get('message', {}).get('content', '') | |
| print(f"[Parent Chunk μμ±] Ollama API μλ΅ μμ μ±κ³΅: {len(analysis_result)}μ") | |
| except requests.exceptions.RequestException as e: | |
| print(f"[Parent Chunk μμ±] β Ollama API μ°κ²° μ€λ₯: {str(e)}") | |
| print(f"[Parent Chunk μμ±] λλ²κ·Έ: Ollama URL: {OLLAMA_BASE_URL}") | |
| raise | |
| if not analysis_result: | |
| print(f"[Parent Chunk μμ±] β οΈ κ²½κ³ : λΆμ κ²°κ³Όκ° λΉμ΄μμ΅λλ€.") | |
| return None | |
| print(f"[Parent Chunk μμ±] λΆμ κ²°κ³Ό μμ μλ£: {len(analysis_result)}μ") | |
| # λΆμ κ²°κ³Ό νμ± | |
| world_view = "" | |
| characters = "" | |
| story = "" | |
| episodes = "" | |
| others = "" | |
| # κ° μΉμ μΆμΆ | |
| sections = { | |
| 'world_view': ['## μΈκ³κ΄ μ€λͺ ', '## μΈκ³κ΄', 'μΈκ³κ΄ μ€λͺ '], | |
| 'characters': ['## μ£Όμ μΊλ¦ν° λΆμ', '## μ£Όμ μΊλ¦ν°', 'μ£Όμ μΊλ¦ν° λΆμ', '## μΊλ¦ν°'], | |
| 'story': ['## μ£Όμ μ€ν 리 λΆμ', '## μ£Όμ μ€ν 리', 'μ£Όμ μ€ν 리 λΆμ', '## μ€ν 리'], | |
| 'episodes': ['## μ£Όμ μνΌμλ λΆμ', '## μ£Όμ μνΌμλ', 'μ£Όμ μνΌμλ λΆμ', '## μνΌμλ'], | |
| 'others': ['## κΈ°ν', 'κΈ°ν'] | |
| } | |
| lines = analysis_result.split('\n') | |
| current_section = None | |
| current_content = [] | |
| for line in lines: | |
| line_stripped = line.strip() | |
| # μΉμ ν€λ νμΈ | |
| section_found = False | |
| for section_key, section_headers in sections.items(): | |
| for header in section_headers: | |
| if header in line_stripped: | |
| # μ΄μ μΉμ μ μ₯ | |
| if current_section: | |
| if current_section == 'world_view': | |
| world_view = '\n'.join(current_content).strip() | |
| elif current_section == 'characters': | |
| characters = '\n'.join(current_content).strip() | |
| elif current_section == 'story': | |
| story = '\n'.join(current_content).strip() | |
| elif current_section == 'episodes': | |
| episodes = '\n'.join(current_content).strip() | |
| elif current_section == 'others': | |
| others = '\n'.join(current_content).strip() | |
| current_section = section_key | |
| current_content = [] | |
| section_found = True | |
| break | |
| if section_found: | |
| break | |
| if not section_found and current_section: | |
| # νμ¬ μΉμ μ λ΄μ© μΆκ° | |
| if line_stripped and not line_stripped.startswith('#'): | |
| current_content.append(line) | |
| # λ§μ§λ§ μΉμ μ μ₯ | |
| if current_section: | |
| if current_section == 'world_view': | |
| world_view = '\n'.join(current_content).strip() | |
| elif current_section == 'characters': | |
| characters = '\n'.join(current_content).strip() | |
| elif current_section == 'story': | |
| story = '\n'.join(current_content).strip() | |
| elif current_section == 'episodes': | |
| episodes = '\n'.join(current_content).strip() | |
| elif current_section == 'others': | |
| others = '\n'.join(current_content).strip() | |
| # νμ± μ€ν¨ μ μ 체 λ΄μ©μ "κΈ°ν"μ μ μ₯ | |
| if not world_view and not characters and not story and not episodes: | |
| print(f"[Parent Chunk μμ±] κ²½κ³ : μΉμ νμ± μ€ν¨. μ 체 λ΄μ©μ 'κΈ°ν'μ μ μ₯ν©λλ€.") | |
| others = analysis_result.strip() | |
| # κΈ°μ‘΄ Parent Chunk μμ (μμΌλ©΄) | |
| existing_parent = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if existing_parent: | |
| db.session.delete(existing_parent) | |
| db.session.commit() | |
| print(f"[Parent Chunk μμ±] κΈ°μ‘΄ Parent Chunk μμ μλ£") | |
| # Parent Chunk μμ± λ° μ μ₯ | |
| parent_chunk = ParentChunk( | |
| file_id=file_id, | |
| world_view=world_view if world_view else None, | |
| characters=characters if characters else None, | |
| story=story if story else None, | |
| episodes=episodes if episodes else None, | |
| others=others if others else None | |
| ) | |
| db.session.add(parent_chunk) | |
| db.session.commit() | |
| print(f"[Parent Chunk μμ±] β μλ£: Parent Chunkκ° μμ±λμμ΅λλ€.") | |
| print(f"[Parent Chunk μμ±] - μΈκ³κ΄: {len(world_view)}μ") | |
| print(f"[Parent Chunk μμ±] - μΊλ¦ν°: {len(characters)}μ") | |
| print(f"[Parent Chunk μμ±] - μ€ν 리: {len(story)}μ") | |
| print(f"[Parent Chunk μμ±] - μνΌμλ: {len(episodes)}μ") | |
| print(f"[Parent Chunk μμ±] - κΈ°ν: {len(others)}μ") | |
| return parent_chunk | |
| except requests.exceptions.RequestException as e: | |
| error_msg = f'Ollama API μ°κ²° μ€λ₯: {str(e)}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = f'Parent Chunk μμ± μ€ μ€λ₯: {str(e)}' | |
| print(f"[Parent Chunk μμ±] β μ€λ₯: {error_msg}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def get_parent_chunks_for_files(file_ids): | |
| """νμΌ ID λͺ©λ‘μ λν Parent Chunk μ‘°ν (λ¬Έλ§₯ νμ μ©)""" | |
| try: | |
| if not file_ids: | |
| return [] | |
| parent_chunks = [] | |
| for file_id in file_ids: | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if parent_chunk: | |
| parent_chunks.append(parent_chunk) | |
| return parent_chunks | |
| except Exception as e: | |
| print(f"[Parent Chunk μ‘°ν] μ€λ₯: {str(e)}") | |
| return [] | |
| def search_relevant_chunks(query, file_ids=None, model_name=None, top_k=5, min_score=1): | |
| """ | |
| μ§λ¬Έκ³Ό κ΄λ ¨λ μ²ν¬ κ²μ (λ²‘ν° κ²μ + Re-ranking) | |
| 1. λ²‘ν° κ²μμΌλ‘ μ΄κΈ° 30κ° λ¬Έμ κ²μ | |
| 2. Cross-Encoderλ‘ λ¦¬λνΉ | |
| 3. μμ top_kκ° λ°ν (κΈ°λ³Έ 5κ°) | |
| """ | |
| try: | |
| # λ²‘ν° DB λ§€λμ κ°μ Έμ€κΈ° | |
| vector_db = get_vector_db() | |
| # νμΌ ID νμ₯ (μ΄μ΄μ μ λ‘λλ νμΌ ν¬ν¨) | |
| expanded_file_ids = None | |
| if file_ids: | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # μλ³Έ νμΌμ΄ μ νλ κ²½μ°, μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all() | |
| for parent_file in parent_files: | |
| child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # λͺ¨λΈ νν°λ§μ΄ νμν κ²½μ° νμΌ ID νν°λ§ | |
| if model_name and expanded_file_ids: | |
| filtered_files = UploadedFile.query.filter( | |
| UploadedFile.id.in_(expanded_file_ids), | |
| UploadedFile.model_name == model_name | |
| ).all() | |
| expanded_file_ids = [f.id for f in filtered_files] | |
| elif model_name and not expanded_file_ids: | |
| # νμΌ IDκ° μμΌλ©΄ λͺ¨λΈ μ΄λ¦μΌλ‘λ§ νν°λ§ | |
| filtered_files = UploadedFile.query.filter_by(model_name=model_name).all() | |
| expanded_file_ids = [f.id for f in filtered_files] | |
| # 1λ¨κ³: λ²‘ν° κ²μμΌλ‘ μ΄κΈ° 30κ° λ¬Έμ κ²μ | |
| print(f"[λ²‘ν° κ²μ] 쿼리: {query[:50]}..., νμΌ ID: {expanded_file_ids if expanded_file_ids else 'λͺ¨λ νμΌ'}") | |
| vector_results = vector_db.search_chunks( | |
| query=query, | |
| file_ids=expanded_file_ids, | |
| top_k=30 | |
| ) | |
| if not vector_results: | |
| print(f"[λ²‘ν° κ²μ] κ²°κ³Ό μμ, ν€μλ κΈ°λ° κ²μμΌλ‘ λ체") | |
| # λ²‘ν° κ²μ κ²°κ³Όκ° μμΌλ©΄ κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μμΌλ‘ λ체 | |
| return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score) | |
| # 2λ¨κ³: Cross-Encoderλ‘ λ¦¬λνΉ | |
| print(f"[리λνΉ] {len(vector_results)}κ° μ²ν¬μ λν 리λνΉ μμ...") | |
| reranked_chunks = vector_db.rerank_chunks( | |
| query=query, | |
| chunks=vector_results, | |
| top_k=top_k | |
| ) | |
| # 3λ¨κ³: DBμμ μ²ν¬ κ°μ²΄ κ°μ Έμ€κΈ° | |
| final_chunks = [] | |
| for reranked in reranked_chunks: | |
| chunk_id = reranked['chunk_id'] | |
| chunk = DocumentChunk.query.get(chunk_id) | |
| if chunk: | |
| final_chunks.append(chunk) | |
| print(f"[λ²‘ν° κ²μ + 리λνΉ] μ΅μ’ {len(final_chunks)}κ° μ²ν¬ λ°ν") | |
| return final_chunks | |
| except Exception as e: | |
| print(f"[λ²‘ν° κ²μ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| # μ€λ₯ μ κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μμΌλ‘ λ체 | |
| print(f"[λ²‘ν° κ²μ] ν€μλ κΈ°λ° κ²μμΌλ‘ λ체") | |
| return search_relevant_chunks_fallback(query, file_ids, model_name, top_k, min_score) | |
| def search_relevant_chunks_fallback(query, file_ids=None, model_name=None, top_k=25, min_score=1): | |
| """κΈ°μ‘΄ ν€μλ κΈ°λ° κ²μ (Fallback)""" | |
| try: | |
| # κ²μ 쿼리 μ€λΉ - νκΈκ³Ό μλ¬Έ λ¨μ΄ λͺ¨λ μΆμΆ | |
| query_words = set(re.findall(r'[κ°-ν£]+|\w+', query.lower())) | |
| if not query_words: | |
| return [] | |
| # μ²ν¬ μ‘°ν | |
| query_obj = DocumentChunk.query.join(UploadedFile) | |
| if file_ids: | |
| # μ νλ νμΌ IDμ κ·Έ νμΌμ μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌ ID ν¬ν¨ | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| # μλ³Έ νμΌμ΄ μ νλ κ²½μ°, μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| parent_files = UploadedFile.query.filter(UploadedFile.id.in_(file_ids), UploadedFile.parent_file_id.is_(None)).all() | |
| for parent_file in parent_files: | |
| child_files = UploadedFile.query.filter_by(parent_file_id=parent_file.id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| query_obj = query_obj.filter(UploadedFile.id.in_(expanded_file_ids)) | |
| if model_name: | |
| query_obj = query_obj.filter(UploadedFile.model_name == model_name) | |
| all_chunks = query_obj.all() | |
| if not all_chunks: | |
| return [] | |
| # κ° μ²ν¬μ κ΄λ ¨λ μ μ κ³μ° (κ°μ λ μκ³ λ¦¬μ¦) | |
| scored_chunks = [] | |
| for chunk in all_chunks: | |
| chunk_content_lower = chunk.content.lower() | |
| chunk_words = set(re.findall(r'[κ°-ν£]+|\w+', chunk_content_lower)) | |
| # 1. κ³΅ν΅ λ¨μ΄ μ (κΈ°λ³Έ μ μ) | |
| common_words = query_words & chunk_words | |
| base_score = len(common_words) | |
| # 2. 쿼리 λ¨μ΄μ λΉλ κ°μ€μΉ (μ€μν λ¨μ΄κ° λ λ§μ΄ λνλ μλ‘ λμ μ μ) | |
| frequency_score = 0 | |
| for word in query_words: | |
| frequency_score += chunk_content_lower.count(word) | |
| # 3. 쿼리 λ¨μ΄ λΉμ¨ (μ²ν¬μμ 쿼리 λ¨μ΄κ° μ°¨μ§νλ λΉμ¨) | |
| if len(chunk_words) > 0: | |
| ratio_score = len(common_words) / len(chunk_words) * 10 | |
| else: | |
| ratio_score = 0 | |
| # μ΅μ’ μ μ κ³μ° (κ°μ€μΉ μ μ©) | |
| final_score = base_score * 2 + frequency_score * 0.5 + ratio_score | |
| # μ΅μ μ μ μ΄μμΈ μ²ν¬λ§ ν¬ν¨ | |
| if final_score >= min_score: | |
| scored_chunks.append((final_score, chunk)) | |
| # μ μ μμΌλ‘ μ λ ¬νκ³ μμ kκ° μ ν | |
| scored_chunks.sort(key=lambda x: x[0], reverse=True) | |
| # top_kκ° μ ν | |
| top_chunks = [chunk for score, chunk in scored_chunks[:top_k]] | |
| return top_chunks | |
| except Exception as e: | |
| print(f"[ν€μλ κ²μ] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return [] | |
| def login(): | |
| """λ‘κ·ΈμΈ νμ΄μ§""" | |
| if current_user.is_authenticated: | |
| # κ΄λ¦¬μμΈ κ²½μ° κ΄λ¦¬μ νμ΄μ§λ‘ 리λ€μ΄λ νΈ | |
| if current_user.is_admin: | |
| return redirect(url_for('main.admin')) | |
| return redirect(url_for('main.index')) | |
| if request.method == 'POST': | |
| username = request.form.get('username', '').strip() | |
| password = request.form.get('password', '') | |
| if not username or not password: | |
| flash('μ¬μ©μλͺ κ³Ό λΉλ°λ²νΈλ₯Ό μ λ ₯ν΄μ£ΌμΈμ.', 'error') | |
| return render_template('login.html') | |
| user = User.query.filter_by(username=username).first() | |
| if user and user.check_password(password) and user.is_active: | |
| login_user(user) | |
| user.last_login = datetime.utcnow() | |
| db.session.commit() | |
| next_page = request.args.get('next') | |
| # κ΄λ¦¬μμΈ κ²½μ° κ΄λ¦¬μ νμ΄μ§λ‘ 리λ€μ΄λ νΈ | |
| if user.is_admin: | |
| return redirect(next_page) if next_page else redirect(url_for('main.admin')) | |
| return redirect(next_page) if next_page else redirect(url_for('main.index')) | |
| else: | |
| flash('μ¬μ©μλͺ λλ λΉλ°λ²νΈκ° μ¬λ°λ₯΄μ§ μμ΅λλ€.', 'error') | |
| return render_template('login.html') | |
| def logout(): | |
| """λ‘κ·Έμμ""" | |
| logout_user() | |
| flash('λ‘κ·Έμμλμμ΅λλ€.', 'info') | |
| return redirect(url_for('main.login')) | |
| def index(): | |
| return render_template('index.html') | |
| def webnovels(): | |
| """μ λ‘λλ μΉμμ€ λͺ©λ‘ νμ΄μ§""" | |
| return render_template('webnovels.html') | |
| def admin(): | |
| """κ΄λ¦¬μ νμ΄μ§""" | |
| users = User.query.order_by(User.created_at.desc()).all() | |
| return render_template('admin.html', users=users) | |
| def admin_messages(): | |
| """κ΄λ¦¬μ λ©μμ§ νμΈ νμ΄μ§""" | |
| return render_template('admin_messages.html') | |
| def admin_webnovels(): | |
| """μΉμμ€ κ΄λ¦¬ νμ΄μ§""" | |
| return render_template('admin_webnovels.html') | |
| def admin_prompts(): | |
| """ν둬ννΈ κ΄λ¦¬ νμ΄μ§""" | |
| return render_template('admin_prompts.html') | |
| def admin_files(): | |
| """νμΌ λͺ©λ‘ κ΄λ¦¬ νμ΄μ§""" | |
| return render_template('admin_files.html') | |
| def get_users(): | |
| """μ¬μ©μ λͺ©λ‘ API""" | |
| try: | |
| users = User.query.order_by(User.created_at.desc()).all() | |
| return jsonify({ | |
| 'users': [user.to_dict() for user in users] | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ¬μ©μ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_user(): | |
| """μ¬μ©μ μμ± API""" | |
| try: | |
| data = request.json | |
| username = data.get('username', '').strip() | |
| nickname = data.get('nickname', '').strip() | |
| password = data.get('password', '') | |
| is_admin = data.get('is_admin', False) | |
| if not username or not password: | |
| return jsonify({'error': 'μ¬μ©μλͺ κ³Ό λΉλ°λ²νΈλ₯Ό μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| if User.query.filter_by(username=username).first(): | |
| return jsonify({'error': 'μ΄λ―Έ μ‘΄μ¬νλ μ¬μ©μλͺ μ λλ€.'}), 400 | |
| user = User(username=username, nickname=nickname if nickname else None, is_admin=is_admin, is_active=True) | |
| user.set_password(password) | |
| db.session.add(user) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'μ¬μ©μκ° μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€.', | |
| 'user': user.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def update_user(user_id): | |
| """μ¬μ©μ μ 보 μμ API""" | |
| try: | |
| user = User.query.get_or_404(user_id) | |
| data = request.json | |
| # μκΈ° μμ μ κ΄λ¦¬μ κΆνμ μ κ±°νλ κ²μ λ°©μ§ | |
| if user_id == current_user.id and data.get('is_admin') == False: | |
| return jsonify({'error': 'μκΈ° μμ μ κ΄λ¦¬μ κΆνμ μ κ±°ν μ μμ΅λλ€.'}), 400 | |
| if 'username' in data: | |
| new_username = data['username'].strip() | |
| if new_username != user.username: | |
| if User.query.filter_by(username=new_username).first(): | |
| return jsonify({'error': 'μ΄λ―Έ μ‘΄μ¬νλ μ¬μ©μλͺ μ λλ€.'}), 400 | |
| user.username = new_username | |
| if 'nickname' in data: | |
| user.nickname = data['nickname'].strip() if data['nickname'] else None | |
| if 'password' in data and data['password']: | |
| user.set_password(data['password']) | |
| if 'is_admin' in data: | |
| user.is_admin = data['is_admin'] | |
| if 'is_active' in data: | |
| user.is_active = data['is_active'] | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'μ¬μ©μ μ λ³΄κ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.', | |
| 'user': user.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μ 보 μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_messages(): | |
| """μ 체 λ©μμ§ μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| user_id = request.args.get('user_id', type=int) | |
| session_id = request.args.get('session_id', type=int) | |
| page = request.args.get('page', 1, type=int) | |
| per_page = request.args.get('per_page', 50, type=int) | |
| query = ChatMessage.query.join(ChatSession) | |
| if user_id: | |
| query = query.filter(ChatSession.user_id == user_id) | |
| if session_id: | |
| query = query.filter(ChatMessage.session_id == session_id) | |
| messages = query.order_by(ChatMessage.created_at.desc())\ | |
| .paginate(page=page, per_page=per_page, error_out=False) | |
| return jsonify({ | |
| 'messages': [msg.to_dict() for msg in messages.items], | |
| 'total': messages.total, | |
| 'pages': messages.pages, | |
| 'current_page': page | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λ©μμ§ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_sessions(): | |
| """μ 체 λν μΈμ μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| user_id = request.args.get('user_id', type=int) | |
| page = request.args.get('page', 1, type=int) | |
| per_page = request.args.get('per_page', 50, type=int) | |
| query = ChatSession.query | |
| if user_id: | |
| query = query.filter(ChatSession.user_id == user_id) | |
| sessions = query.order_by(ChatSession.updated_at.desc())\ | |
| .paginate(page=page, per_page=per_page, error_out=False) | |
| sessions_data = [] | |
| for session in sessions.items: | |
| session_dict = session.to_dict() | |
| session_dict['username'] = session.user.username if session.user else 'Unknown' | |
| session_dict['nickname'] = session.user.nickname if session.user else None | |
| sessions_data.append(session_dict) | |
| return jsonify({ | |
| 'sessions': sessions_data, | |
| 'total': sessions.total, | |
| 'pages': sessions.pages, | |
| 'current_page': page | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_user(user_id): | |
| """μ¬μ©μ μμ API""" | |
| try: | |
| user = User.query.get_or_404(user_id) | |
| # μκΈ° μμ μ μμ νλ κ²μ λ°©μ§ | |
| if user_id == current_user.id: | |
| return jsonify({'error': 'μκΈ° μμ μ μμ ν μ μμ΅λλ€.'}), 400 | |
| db.session.delete(user) | |
| db.session.commit() | |
| return jsonify({'message': 'μ¬μ©μκ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.'}), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'μ¬μ©μ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_gemini_api_key(): | |
| """Gemini API ν€ μ‘°ν""" | |
| try: | |
| # SystemConfigμμ API ν€ κ°μ Έμ€κΈ° (ν μ΄λΈμ΄ μμΌλ©΄ λΉ λ¬Έμμ΄ λ°ν) | |
| api_key = SystemConfig.get_config('gemini_api_key', '') | |
| # 보μμ μν΄ λ§μ€νΉλ κ° λ°ν (μ²μ 8μλ§ νμ) | |
| masked_key = api_key[:8] + '...' if api_key and len(api_key) > 8 else '' | |
| return jsonify({ | |
| 'has_api_key': bool(api_key), | |
| 'masked_key': masked_key | |
| }), 200 | |
| except Exception as e: | |
| print(f"[Gemini API ν€ μ‘°ν] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'API ν€ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def set_gemini_api_key(): | |
| """Gemini API ν€ μ μ₯/μ λ°μ΄νΈ""" | |
| try: | |
| if not request.is_json: | |
| return jsonify({'error': 'Content-Typeμ΄ application/jsonμ΄ μλλλ€.'}), 400 | |
| data = request.json | |
| if not data: | |
| return jsonify({'error': 'μμ² λ°μ΄ν°κ° μμ΅λλ€.'}), 400 | |
| api_key = data.get('api_key', '').strip() | |
| if not api_key: | |
| return jsonify({'error': 'API ν€λ₯Ό μ λ ₯ν΄μ£ΌμΈμ.'}), 400 | |
| # API ν€ μ μ₯ (SystemConfig.set_config λ΄λΆμμ ν μ΄λΈ μμ± μ²λ¦¬) | |
| SystemConfig.set_config( | |
| key='gemini_api_key', | |
| value=api_key, | |
| description='Google Gemini API ν€' | |
| ) | |
| # Gemini ν΄λΌμ΄μΈνΈμ API ν€ μ¬λ‘λ μλ¦Ό | |
| try: | |
| from app.gemini_client import reset_gemini_client | |
| reset_gemini_client() | |
| print(f"[Gemini] API ν€κ° μ λ°μ΄νΈλμ΄ ν΄λΌμ΄μΈνΈκ° μ¬λ‘λλμμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[Gemini] API ν€ μ¬λ‘λ μ€ν¨: {e}") | |
| return jsonify({ | |
| 'message': 'Gemini API ν€κ° μ±κ³΅μ μΌλ‘ μ μ₯λμμ΅λλ€.', | |
| 'has_api_key': True | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[Gemini API ν€ μ μ₯] μ€λ₯: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'API ν€ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_ollama_models(): | |
| """Ollama λ° Geminiμμ μ¬μ© κ°λ₯ν λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (λ‘컬 AI λͺ¨λΈμ νμ΅λ μΉμμ€μ΄ μλ λͺ¨λΈλ§ νμ)""" | |
| try: | |
| all_models = [] | |
| # 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νμ΅λ μΉμμ€μ΄ μλ λͺ¨λΈλ§ νν°λ§) | |
| try: | |
| response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5) | |
| if response.status_code == 200: | |
| data = response.json() | |
| ollama_models_raw = [model['name'] for model in data.get('models', [])] | |
| # κ° Ollama λͺ¨λΈμ λν΄ νμ΅λ μΉμμ€μ΄ μλμ§ νμΈ | |
| filtered_ollama_models = [] | |
| for model_name in ollama_models_raw: | |
| # ν΄λΉ λͺ¨λΈλ‘ νμ΅λ μλ³Έ νμΌμ΄ μλμ§ νμΈ (parent_file_idκ° NoneμΈ νμΌλ§) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=model_name, | |
| parent_file_id=None | |
| ).count() | |
| if file_count > 0: | |
| filtered_ollama_models.append({'name': model_name, 'type': 'ollama'}) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| else: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - νμ΅λ μΉμμ€ μμ, λͺ©λ‘μμ μ μΈ") | |
| all_models.extend(filtered_ollama_models) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(filtered_ollama_models)}κ° μΆκ° (μ 체 {len(ollama_models_raw)}κ° μ€ {len(filtered_ollama_models)}κ° νν°λ§λ¨)") | |
| except Exception as e: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| # 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νμ΅λ μΉμμ€μ΄ μλ λͺ¨λΈλ§ νν°λ§) | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| gemini_models = gemini_client.get_available_models() | |
| # κ° Gemini λͺ¨λΈμ λν΄ νμ΅λ μΉμμ€μ΄ μλμ§ νμΈ | |
| filtered_gemini_models = [] | |
| for model_name in gemini_models: | |
| full_model_name = f'gemini:{model_name}' | |
| # ν΄λΉ λͺ¨λΈλ‘ νμ΅λ μλ³Έ νμΌμ΄ μλμ§ νμΈ (parent_file_idκ° NoneμΈ νμΌλ§) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=full_model_name, | |
| parent_file_id=None | |
| ).count() | |
| if file_count > 0: | |
| filtered_gemini_models.append({'name': full_model_name, 'type': 'gemini'}) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| else: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - νμ΅λ μΉμμ€ μμ, λͺ©λ‘μμ μ μΈ") | |
| all_models.extend(filtered_gemini_models) | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(filtered_gemini_models)}κ° μΆκ° (μ 체 {len(gemini_models)}κ° μ€ {len(filtered_gemini_models)}κ° νν°λ§λ¨)") | |
| else: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini API ν€κ° μ€μ λμ§ μμ Gemini λͺ¨λΈμ λΆλ¬μ¬ μ μμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| if all_models: | |
| return jsonify({'models': all_models}) | |
| else: | |
| return jsonify({'error': 'μ¬μ© κ°λ₯ν λͺ¨λΈμ΄ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§, λλ Gemini API ν€κ° μ€μ λμλμ§ νμΈνμΈμ.', 'models': []}), 500 | |
| except Exception as e: | |
| return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'models': []}), 500 | |
| def get_system_prompt(): | |
| """μμ€ν ν둬ννΈ κ°μ Έμ€κΈ°""" | |
| try: | |
| prompt = SystemConfig.get_config('system_prompt', '') | |
| return jsonify({'prompt': prompt}), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'ν둬ννΈλ₯Ό κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def save_system_prompt(): | |
| """μμ€ν ν둬ννΈ μ μ₯""" | |
| try: | |
| data = request.json | |
| prompt = data.get('prompt', '').strip() | |
| SystemConfig.set_config( | |
| key='system_prompt', | |
| value=prompt, | |
| description='μ§λ¬Έν λ μλμΌλ‘ λΆμ΄λ μμ€ν ν둬ννΈ' | |
| ) | |
| return jsonify({ | |
| 'message': 'ν둬ννΈκ° μ±κ³΅μ μΌλ‘ μ μ₯λμμ΅λλ€.', | |
| 'prompt': prompt | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'ν둬ννΈ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_ollama_models(): | |
| """κ΄λ¦¬μμ©: Ollama λ° Geminiμμ μ¬μ© κ°λ₯ν λͺ¨λ λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νν°λ§ μμ΄ μ 체 λͺ©λ‘)""" | |
| try: | |
| all_models = [] | |
| # 1. Ollama λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νν°λ§ μμ΄ μ 체 λͺ©λ‘) | |
| try: | |
| response = requests.get(f'{OLLAMA_BASE_URL}/api/tags', timeout=5) | |
| if response.status_code == 200: | |
| data = response.json() | |
| ollama_models_raw = [model['name'] for model in data.get('models', [])] | |
| # νν°λ§ μμ΄ λͺ¨λ Ollama λͺ¨λΈ μΆκ° | |
| for model_name in ollama_models_raw: | |
| # κ° λͺ¨λΈμ νμ΅λ μΉμμ€ κ°μ νμΈ (μ 보 μ 곡μ©) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=model_name, | |
| parent_file_id=None | |
| ).count() | |
| all_models.append({ | |
| 'name': model_name, | |
| 'type': 'ollama', | |
| 'file_count': file_count # μ 보 μ κ³΅μ© | |
| }) | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ '{model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ {len(ollama_models_raw)}κ° μΆκ°") | |
| except Exception as e: | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Ollama λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| # 2. Gemini λͺ¨λΈ λͺ©λ‘ κ°μ Έμ€κΈ° (νν°λ§ μμ΄ μ 체 λͺ©λ‘) | |
| try: | |
| gemini_client = get_gemini_client() | |
| if gemini_client.is_configured(): | |
| gemini_models = gemini_client.get_available_models() | |
| # νν°λ§ μμ΄ λͺ¨λ Gemini λͺ¨λΈ μΆκ° | |
| for model_name in gemini_models: | |
| full_model_name = f'gemini:{model_name}' | |
| # κ° λͺ¨λΈμ νμ΅λ μΉμμ€ κ°μ νμΈ (μ 보 μ 곡μ©) | |
| file_count = UploadedFile.query.filter_by( | |
| model_name=full_model_name, | |
| parent_file_id=None | |
| ).count() | |
| all_models.append({ | |
| 'name': full_model_name, | |
| 'type': 'gemini', | |
| 'file_count': file_count # μ 보 μ κ³΅μ© | |
| }) | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ '{full_model_name}' - νμ΅λ μΉμμ€ {file_count}κ°") | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ {len(gemini_models)}κ° μΆκ°") | |
| else: | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini API ν€κ° μ€μ λμ§ μμ Gemini λͺ¨λΈμ λΆλ¬μ¬ μ μμ΅λλ€.") | |
| except Exception as e: | |
| print(f"[κ΄λ¦¬μ λͺ¨λΈ λͺ©λ‘] Gemini λͺ¨λΈ λͺ©λ‘ μ‘°ν μ€ν¨: {e}") | |
| if all_models: | |
| return jsonify({'models': all_models}) | |
| else: | |
| return jsonify({'error': 'μ¬μ© κ°λ₯ν λͺ¨λΈμ΄ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§, λλ Gemini API ν€κ° μ€μ λμλμ§ νμΈνμΈμ.', 'models': []}), 500 | |
| except Exception as e: | |
| return jsonify({'error': f'λͺ¨λΈ λͺ©λ‘μ κ°μ Έμ€λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}', 'models': []}), 500 | |
| def chat(): | |
| """μ±ν API μλν¬μΈνΈ""" | |
| try: | |
| data = request.json | |
| message = data.get('message', '') | |
| model = data.get('model', '') | |
| file_ids = [int(fid) for fid in data.get('file_ids', []) if fid] # μ νν μΉμμ€ νμΌ ID λͺ©λ‘ | |
| session_id = data.get('session_id', None) # λν μΈμ ID (μ μλ‘ λ³ν) | |
| if not message: | |
| return jsonify({'error': 'λ©μμ§κ° νμν©λλ€.'}), 400 | |
| # λͺ¨λΈμ΄ μ νλ κ²½μ° Ollama μ¬μ© | |
| if model: | |
| try: | |
| # RAG: μ§λ¬Έκ³Ό κ΄λ ¨λ μ²ν¬ κ²μ | |
| context = "" | |
| use_rag = True # RAG μ¬μ© μ¬λΆ | |
| if use_rag: | |
| print(f"\n[RAG κ²μ] λͺ¨λΈ: {model}, μ§λ¬Έ: {message[:50]}...") | |
| print(f"[RAG κ²μ] μ νλ νμΌ ID: {file_ids if file_ids else 'μμ (λͺ¨λ νμΌ κ²μ)'}") | |
| # 1λ¨κ³: Parent Chunkλ‘ λ¬Έλ§₯ νμ | |
| parent_chunks = [] | |
| if file_ids: | |
| print(f"[RAG κ²μ 1λ¨κ³] Parent Chunk μ‘°ν μμ...") | |
| parent_chunks = get_parent_chunks_for_files(file_ids) | |
| print(f"[RAG κ²μ 1λ¨κ³] Parent Chunk μ‘°ν μλ£: {len(parent_chunks)}κ° νμΌ") | |
| # 2λ¨κ³: λ²‘ν° κ²μ + 리λνΉμΌλ‘ Child Chunk μ λ° κ²μ | |
| print(f"[RAG κ²μ 2λ¨κ³] λ²‘ν° κ²μ + 리λνΉ μμ...") | |
| relevant_chunks = search_relevant_chunks( | |
| query=message, | |
| file_ids=file_ids if file_ids else None, | |
| model_name=model, | |
| top_k=5, # 리λνΉ ν μμ 5κ°λ§ μ ν | |
| min_score=0.5 # μ΅μ μ μ μκ³κ° | |
| ) | |
| print(f"[RAG κ²μ 2λ¨κ³] λ²‘ν° κ²μ + 리λνΉ μλ£: {len(relevant_chunks)}κ° μ²ν¬ (μμ 5κ°)") | |
| # 컨ν μ€νΈ κ΅¬μ± | |
| context_parts = [] | |
| # Parent Chunk μ 보 μΆκ° (λ¬Έλ§₯ νμ μ©) | |
| if parent_chunks: | |
| parent_context_sections = [] | |
| for parent_chunk in parent_chunks: | |
| file = parent_chunk.file | |
| file_info = f"\n=== {file.original_filename} μ 체 κ°μ ===\n" | |
| sections = [] | |
| if parent_chunk.world_view: | |
| sections.append(f"[μΈκ³κ΄]\n{parent_chunk.world_view}") | |
| if parent_chunk.characters: | |
| sections.append(f"[μ£Όμ μΊλ¦ν°]\n{parent_chunk.characters}") | |
| if parent_chunk.story: | |
| sections.append(f"[μ£Όμ μ€ν 리]\n{parent_chunk.story}") | |
| if parent_chunk.episodes: | |
| sections.append(f"[μ£Όμ μνΌμλ]\n{parent_chunk.episodes}") | |
| if parent_chunk.others: | |
| sections.append(f"[κΈ°ν μ 보]\n{parent_chunk.others}") | |
| if sections: | |
| parent_context_sections.append(file_info + "\n\n".join(sections)) | |
| if parent_context_sections: | |
| parent_context = "\n\n".join(parent_context_sections) | |
| context_parts.append(f"λ€μμ μΉμμ€μ μ 체μ μΈ λ¬Έλ§₯κ³Ό κ°μμ λλ€:\n\n{parent_context}") | |
| print(f"[RAG κ²μ] Parent Chunk 컨ν μ€νΈ μΆκ°: {len(parent_context)}μ") | |
| # Child Chunk μ 보 μΆκ° (μ λ° κ²μ κ²°κ³Ό) | |
| if relevant_chunks: | |
| child_context_parts = [] | |
| seen_files = set() | |
| for chunk in relevant_chunks: | |
| file = chunk.file | |
| if file.original_filename not in seen_files: | |
| seen_files.add(file.original_filename) | |
| print(f"[RAG κ²μ] μ¬μ©λ νμΌ: {file.original_filename} (λͺ¨λΈ: {file.model_name})") | |
| child_context_parts.append(f"[{file.original_filename} - μ²ν¬ {chunk.chunk_index + 1}]\n{chunk.content}") | |
| if child_context_parts: | |
| # 컨ν μ€νΈ κΈΈμ΄ νμΈ λ° μ΅μ ν | |
| full_child_context = "\n\n".join(child_context_parts) | |
| child_context_length = len(full_child_context) | |
| # Child Chunk 컨ν μ€νΈκ° λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 15000μ) | |
| if child_context_length > 15000: | |
| truncated_parts = [] | |
| current_length = 0 | |
| for part in child_context_parts: | |
| if current_length + len(part) > 15000: | |
| break | |
| truncated_parts.append(part) | |
| current_length += len(part) | |
| full_child_context = "\n\n".join(truncated_parts) | |
| print(f"[RAG κ²μ] Child Chunk 컨ν μ€νΈ κΈΈμ΄ μ‘°μ : {child_context_length}μ β {len(full_child_context)}μ") | |
| context_parts.append(f"λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μΉμμ€μ ꡬ체μ μΈ λ΄μ©μ λλ€ (μ λ° κ²μ κ²°κ³Ό, μ΄ {len(relevant_chunks)}κ° μ²ν¬):\n\n{full_child_context}") | |
| print(f"[RAG κ²μ] Child Chunk 컨ν μ€νΈ μΆκ°: {len(full_child_context)}μ") | |
| # μ΅μ’ 컨ν μ€νΈ κ΅¬μ± | |
| if context_parts: | |
| full_context = "\n\n" + "\n\n---\n\n".join(context_parts) + "\n\n" | |
| # Parent Chunkμ Child Chunk λͺ¨λ μλ κ²½μ° | |
| if parent_chunks and relevant_chunks: | |
| context = f"""λ€μμ μ§λ¬Έμ λ΅νκΈ° μν μΉμμ€ μ 보μ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ λ΅λ³ν΄μ£ΌμΈμ: | |
| - λ¨Όμ μ 체μ μΈ λ¬Έλ§₯(Parent Chunk)μ μ΄ν΄νμ¬ μΉμμ€μ λ°°κ²½κ³Ό μ€μ μ νμ νμΈμ. | |
| - κ·Έ λ€μ ꡬ체μ μΈ λ΄μ©(Child Chunk)μ ν΅ν΄ μ§λ¬Έμ λν μ νν λ΅λ³μ μ 곡νμΈμ. | |
| - μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ μΌκ΄μ± μλ λ΅λ³μ μμ±νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| elif parent_chunks: | |
| # Parent Chunkλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μΉμμ€μ μ 체μ μΈ λ¬Έλ§₯κ³Ό κ°μμ λλ€: | |
| {full_context} | |
| μ μ 보λ₯Ό μ°Έκ³ νμ¬ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ. μΉμμ€μ λ°°κ²½κ³Ό μ€μ μ κ³ λ €νμ¬ λ΅λ³νμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| else: | |
| # Child Chunkλ§ μλ κ²½μ° | |
| context = f"""λ€μμ μ§λ¬Έκ³Ό κ΄λ ¨λ μΉμμ€μ ꡬ체μ μΈ λ΄μ©μ λλ€: | |
| {full_context} | |
| μ λ΄μ©μ μΆ©λΆν μ°Έκ³ νμ¬ λ€μ μ§λ¬Έμ μ ννκ³ μμΈνκ² λ΅λ³ν΄μ£ΌμΈμ. μΉμμ€μ λ§₯λ½κ³Ό μ€ν 리λ₯Ό κ³ λ €νμ¬ λ΅λ³ν΄μ£ΌμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| context += message | |
| print(f"[RAG κ²μ] μ΅μ’ 컨ν μ€νΈ μμ± μλ£ (Parent Chunk: {len(parent_chunks)}κ°, Child Chunk: {len(relevant_chunks)}κ°, μ΄ {len(context)}μ)") | |
| else: | |
| # RAG κ²μ κ²°κ³Όκ° μμΌλ©΄ κΈ°μ‘΄ λ°©μ μ¬μ© | |
| print(f"[RAG κ²μ] κ΄λ ¨ μ²ν¬λ₯Ό μ°Ύμ§ λͺ»νμ΅λλ€. μ 체 νμΌ λ΄μ© μ¬μ©") | |
| use_rag = False | |
| # RAG κ²μ κ²°κ³Όκ° μκ±°λ λΉνμ±νλ κ²½μ° κΈ°μ‘΄ λ°©μ μ¬μ© | |
| if not context and not use_rag: | |
| if file_ids: | |
| # μ νν νμΌ IDμ μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| expanded_file_ids = list(file_ids) | |
| for file_id in file_ids: | |
| # μλ³Έ νμΌμΈ κ²½μ° μ΄μ΄μ μ λ‘λλ νμΌλ€λ ν¬ν¨ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| expanded_file_ids.extend([child.id for child in child_files]) | |
| uploaded_files = UploadedFile.query.filter( | |
| UploadedFile.id.in_(expanded_file_ids), | |
| UploadedFile.model_name == model | |
| ).all() | |
| print(f"[νμΌ μ¬μ©] μ νλ νμΌ IDλ‘ μ‘°ν (μ΄μ΄μ μ λ‘λ ν¬ν¨): {len(uploaded_files)}κ° νμΌ") | |
| else: | |
| # νμΌ IDκ° μμΌλ©΄ ν΄λΉ λͺ¨λΈμ λͺ¨λ νμΌ μ¬μ© (μλ³Έ λ° μ΄μ΄μ μ λ‘λ ν¬ν¨) | |
| uploaded_files = UploadedFile.query.filter_by(model_name=model).all() | |
| print(f"[νμΌ μ¬μ©] λͺ¨λΈ '{model}'μ λͺ¨λ νμΌ μ¬μ©: {len(uploaded_files)}κ° νμΌ") | |
| if uploaded_files: | |
| print(f"[νμΌ μ¬μ©] μ¬μ©λλ νμΌ λͺ©λ‘:") | |
| for f in uploaded_files: | |
| is_child = f.parent_file_id is not None | |
| prefix = " ββ " if is_child else " - " | |
| print(f"{prefix}{f.original_filename} (λͺ¨λΈ: {f.model_name})") | |
| context_parts = [] | |
| for file in uploaded_files: | |
| try: | |
| if os.path.exists(file.file_path): | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| file_content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| file_content = f.read() | |
| # νμΌ λ΄μ©μ΄ λ무 κΈΈλ©΄ μΌλΆλ§ μ¬μ© (μ΅λ 20000μλ‘ μ¦κ°) | |
| if len(file_content) > 20000: | |
| file_content = file_content[:20000] + "..." | |
| context_parts.append(f"[{file.original_filename}]\n{file_content}") | |
| except Exception as e: | |
| print(f"νμΌ μ½κΈ° μ€λ₯ ({file.original_filename}): {str(e)}") | |
| continue | |
| if context_parts: | |
| context = "\n\n".join(context_parts) | |
| context = f"""λ€μμ νμ΅λ μΉμμ€ λ΄μ©μ λλ€: | |
| {context} | |
| μ λ΄μ©μ μ°Έκ³ νμ¬ λ€μ μ§λ¬Έμ λ΅λ³ν΄μ£ΌμΈμ. | |
| μ€μ: μ§λ¬Έμ λ΅λ³ν λλ λ°λμ μ 곡λ [μμ€ λ³Έλ¬Έ] λ΄μ λ΄μ©μ κ·Όκ±°λ‘ ν΄μΌ ν©λλ€. | |
| λ΅λ³μ κ° λ¬Έμ₯ λμλ μ°Έκ³ ν λ³Έλ¬Έμ λ¬Έμ₯μ [κ·Όκ±°: "λ¬Έμ₯ λ΄μ©..."] νμμΌλ‘ λ°λμ λΆμ΄μΈμ. | |
| κ·Όκ±°λ₯Ό μ°Ύμ μ μλ€λ©΄ "λ΄μ©μ μ°Ύμ μ μμ΅λλ€"λΌκ³ λ΅νκ³ μ§μ΄λ΄μ§ λ§μΈμ. | |
| μ§λ¬Έ: | |
| """ | |
| # μμ€ν ν둬ννΈ κ°μ Έμ€κΈ° | |
| system_prompt = SystemConfig.get_config('system_prompt', '').strip() | |
| # ν둬ννΈ κ΅¬μ± (μμ€ν ν둬ννΈ + 컨ν μ€νΈ + μ¬μ©μ λ©μμ§) | |
| prompt_parts = [] | |
| if system_prompt: | |
| prompt_parts.append(system_prompt) | |
| if context: | |
| prompt_parts.append(context) | |
| prompt_parts.append(message) | |
| full_prompt = "\n\n".join(prompt_parts) | |
| if system_prompt: | |
| print(f"[ν둬ννΈ] μμ€ν ν둬ννΈ μ μ©: {len(system_prompt)}μ") | |
| # λͺ¨λΈ νμ νμΈ (Gemini λλ Ollama) | |
| is_gemini = model.startswith('gemini:') | |
| if is_gemini: | |
| # Gemini API νΈμΆ | |
| gemini_model_name = model.replace('gemini:', '') | |
| print(f"[Gemini] λͺ¨λΈ: {gemini_model_name}, μ§λ¬Έ: {message[:50]}...") | |
| gemini_client = get_gemini_client() | |
| if not gemini_client.is_configured(): | |
| return jsonify({'error': 'Gemini API ν€κ° μ€μ λμ§ μμμ΅λλ€. GEMINI_API_KEY νκ²½ λ³μλ₯Ό μ€μ νμΈμ.'}), 500 | |
| result = gemini_client.generate_response( | |
| prompt=full_prompt, | |
| model_name=gemini_model_name, | |
| temperature=0.7, | |
| max_output_tokens=8192 | |
| ) | |
| if result['error']: | |
| return jsonify({'error': result['error']}), 500 | |
| response_text = result['response'] | |
| else: | |
| # Ollama API νΈμΆ | |
| ollama_response = requests.post( | |
| f'{OLLAMA_BASE_URL}/api/generate', | |
| json={ | |
| 'model': model, | |
| 'prompt': full_prompt, | |
| 'stream': False | |
| }, | |
| timeout=120 # νμΌμ΄ λ§μ μ μμΌλ―λ‘ νμμμ μ¦κ° | |
| ) | |
| if ollama_response.status_code != 200: | |
| # μ€λ₯ μμΈ μ 보 κ°μ Έμ€κΈ° | |
| try: | |
| error_detail = ollama_response.json().get('error', ollama_response.text[:200]) | |
| except: | |
| error_detail = ollama_response.text[:200] if ollama_response.text else 'μμΈ μ 보 μμ' | |
| if ollama_response.status_code == 404: | |
| error_msg = f'λͺ¨λΈ "{model}"μ(λ₯Ό) μ°Ύμ μ μμ΅λλ€. λͺ¨λΈμ΄ Ollamaμ μ€μΉλμ΄ μλμ§ νμΈνμΈμ. (μ€λ₯: {error_detail})' | |
| else: | |
| error_msg = f'Ollama μλ² μ€λ₯: {ollama_response.status_code} (μ€λ₯: {error_detail})' | |
| return jsonify({'error': error_msg}), ollama_response.status_code | |
| ollama_data = ollama_response.json() | |
| response_text = ollama_data.get('response', 'μλ΅μ μμ±ν μ μμ΅λλ€.') | |
| # λν μΈμ μ λ©μμ§ μ μ₯ (Geminiμ Ollama 곡ν΅) | |
| session_id = data.get('session_id') | |
| session_dict = None | |
| if session_id: | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first() | |
| if session: | |
| # μ¬μ©μ λ©μμ§κ° μ΄λ―Έ μ μ₯λμ΄ μλμ§ νμΈ (μ€λ³΅ λ°©μ§) | |
| # κ°μ₯ μ΅κ·Ό λ©μμ§λ₯Ό νμΈνμ¬ μ€λ³΅ μ μ₯ λ°©μ§ | |
| latest_user_msg = ChatMessage.query.filter_by( | |
| session_id=session_id, | |
| role='user' | |
| ).order_by(ChatMessage.created_at.desc()).first() | |
| # μ΅κ·Ό 10μ΄ μ΄λ΄μ κ°μ λ΄μ©μ λ©μμ§κ° μμΌλ©΄ μ μ₯ | |
| should_save = True | |
| if latest_user_msg: | |
| time_diff = (datetime.utcnow() - latest_user_msg.created_at).total_seconds() | |
| if latest_user_msg.content == message and time_diff < 10: | |
| should_save = False | |
| print(f"[μ€λ³΅ λ°©μ§] μ΅κ·Ό {time_diff:.2f}μ΄ μ μ κ°μ λ©μμ§κ° μ μ₯λμ΄ μμ΅λλ€. μ μ₯μ 건λλλλ€.") | |
| if should_save: | |
| user_msg = ChatMessage( | |
| session_id=session_id, | |
| role='user', | |
| content=message | |
| ) | |
| db.session.add(user_msg) | |
| print(f"[λ©μμ§ μ μ₯] μ¬μ©μ λ©μμ§ μ μ₯: {message[:50]}...") | |
| # μΈμ μ λͺ© μ λ°μ΄νΈ (첫 μ¬μ©μ λ©μμ§μΈ κ²½μ°) | |
| title_needs_update = ( | |
| not session.title or | |
| session.title.strip() == '' or | |
| session.title == 'μ λν' | |
| ) | |
| if title_needs_update and message.strip(): | |
| # λ©μμ§ λ΄μ©μ μ λͺ©μΌλ‘ μ¬μ© (μ΅λ 30μ) | |
| title = message.strip()[:30] | |
| if len(message.strip()) > 30: | |
| title += '...' | |
| session.title = title | |
| print(f"[μΈμ μ λͺ©] μ λ°μ΄νΈ: '{title}' (μλ³Έ κΈΈμ΄: {len(message.strip())}μ)") | |
| elif title_needs_update: | |
| print(f"[μΈμ μ λͺ©] λ©μμ§κ° λΉμ΄μμ΄ μ λͺ©μ μ λ°μ΄νΈνμ§ μμ΅λλ€.") | |
| else: | |
| print(f"[λ©μμ§ μ μ₯] μ€λ³΅ λ©μμ§λ‘ μΈν΄ μ μ₯μ 건λλλλ€.") | |
| # AI μλ΅ μ μ₯ | |
| ai_msg = ChatMessage( | |
| session_id=session_id, | |
| role='ai', | |
| content=response_text | |
| ) | |
| db.session.add(ai_msg) | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| # μΈμ μ 보λ₯Ό μλ΅μ ν¬ν¨ (μ λͺ© μ λ°μ΄νΈ λ°μ) | |
| session_dict = session.to_dict() | |
| except Exception as e: | |
| print(f"λ©μμ§ μ μ₯ μ€λ₯: {str(e)}") | |
| db.session.rollback() | |
| session_dict = None | |
| response_data = {'response': response_text, 'session_id': session_id} | |
| if session_dict: | |
| response_data['session'] = session_dict | |
| return jsonify(response_data) | |
| except requests.exceptions.ConnectionError: | |
| return jsonify({'error': 'Ollama μλ²μ μ°κ²°ν μ μμ΅λλ€. Ollamaκ° μ€ν μ€μΈμ§ νμΈνμΈμ.'}), 503 | |
| except requests.exceptions.Timeout: | |
| return jsonify({'error': 'μλ΅ μκ°μ΄ μ΄κ³Όλμμ΅λλ€. λ μ§§μ λ©μμ§λ₯Ό μλν΄λ³΄μΈμ.'}), 504 | |
| except Exception as e: | |
| return jsonify({'error': f'Ollama ν΅μ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| else: | |
| # λͺ¨λΈμ΄ μ νλμ§ μμ κ²½μ° κΈ°λ³Έ μλ΅ | |
| response_text = f"μλ νμΈμ! '{message}'μ λν λ΅λ³μ μ€λΉ μ€μ λλ€.\n\nμ’μΈ‘ νλ¨μμ λ‘컬 AI λͺ¨λΈμ μ ννλ©΄ λ μ νν λ΅λ³μ μ 곡ν μ μμ΅λλ€." | |
| return jsonify({'response': response_text}) | |
| except Exception as e: | |
| return jsonify({'error': f'μ±ν μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def upload_file(): | |
| """μΉμμ€ νμΌ μ λ‘λ""" | |
| import sys | |
| import traceback | |
| # λͺ¨λ μΆλ ₯μ μ¦μ νλ¬μνμ¬ λ‘κ·Έκ° λ°λ‘ 보μ΄λλ‘ | |
| def log_print(*args, **kwargs): | |
| from datetime import datetime | |
| timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] | |
| print(f"[{timestamp}]", *args, **kwargs) | |
| sys.stdout.flush() | |
| try: | |
| log_print(f"\n{'='*60}") | |
| log_print(f"=== νμΌ μ λ‘λ μμ² μμ ===") | |
| log_print(f"μμ² URL: {request.url}") | |
| log_print(f"μμ² λ©μλ: {request.method}") | |
| log_print(f"Content-Type: {request.content_type}") | |
| log_print(f"Content-Length: {request.content_length}") | |
| log_print(f"Remote Address: {request.remote_addr}") | |
| log_print(f"Headers: {dict(request.headers)}") | |
| log_print(f"Form λ°μ΄ν° ν€: {list(request.form.keys())}") | |
| log_print(f"Files ν€: {list(request.files.keys())}") | |
| log_print(f"μ¬μ©μ: {current_user.username if current_user and current_user.is_authenticated else 'None'}") | |
| log_print(f"μ¬μ©μ μΈμ¦ μν: {current_user.is_authenticated if current_user else False}") | |
| log_print(f"{'='*60}\n") | |
| # μ λ‘λ ν΄λ νμΈ λ° μμ± | |
| try: | |
| ensure_upload_folder() | |
| log_print(f"[1/8] μ λ‘λ ν΄λ νμΈ μλ£: {UPLOAD_FOLDER}") | |
| except Exception as e: | |
| error_msg = f'μ λ‘λ ν΄λλ₯Ό μ€λΉν μ μμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'folder_check'}), 500 | |
| if 'file' not in request.files: | |
| error_msg = 'νμΌμ΄ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| log_print(f"μ¬μ© κ°λ₯ν ν€: {list(request.files.keys())}") | |
| return jsonify({'error': error_msg, 'step': 'file_check'}), 400 | |
| file = request.files['file'] | |
| model_name = request.form.get('model_name', '').strip() | |
| parent_file_id = request.form.get('parent_file_id', None) # μ΄μ΄μ μ λ‘λν κ²½μ° μλ³Έ νμΌ ID | |
| log_print(f"[2/8] νμΌ μμ : {file.filename if file else 'None'}") | |
| log_print(f"[2/8] λͺ¨λΈλͺ : {model_name if model_name else 'None (λΉμ΄μμ)'}") | |
| log_print(f"[2/8] μ΄μ΄μ μ λ‘λ: {parent_file_id if parent_file_id else 'μλμ€'}") | |
| if file.filename == '': | |
| error_msg = 'νμΌλͺ μ΄ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'filename_check'}), 400 | |
| # λͺ¨λΈλͺ κ²μ¦ | |
| if not model_name: | |
| error_msg = 'AI λͺ¨λΈμ μ νν΄μ£ΌμΈμ.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'model_check'}), 400 | |
| # parent_file_id κ²μ¦ (μ΄μ΄μ μ λ‘λμΈ κ²½μ°) | |
| parent_file = None | |
| if parent_file_id: | |
| try: | |
| parent_file_id = int(parent_file_id) | |
| parent_file = UploadedFile.query.filter_by( | |
| id=parent_file_id, | |
| uploaded_by=current_user.id | |
| ).first() | |
| if not parent_file: | |
| error_msg = 'μλ³Έ νμΌμ μ°Ύμ μ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'parent_file_check'}), 404 | |
| # κ°μ λͺ¨λΈμΈμ§ νμΈ | |
| if parent_file.model_name != model_name: | |
| error_msg = 'κ°μ λͺ¨λΈμ νμΌμλ§ μ΄μ΄μ μ λ‘λν μ μμ΅λλ€.' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'model_mismatch'}), 400 | |
| log_print(f"[μ΄μ΄μ μ λ‘λ] μλ³Έ νμΌ: {parent_file.original_filename} (ID: {parent_file_id})") | |
| except (ValueError, TypeError): | |
| parent_file_id = None | |
| log_print(f"[κ²½κ³ ] μλͺ»λ parent_file_id: {parent_file_id}") | |
| log_print(f"[3/8] μ λ‘λ μλ: {file.filename}, λͺ¨λΈ: {model_name}") | |
| if not allowed_file(file.filename): | |
| error_msg = f'νμ©λμ§ μμ νμΌ νμμ λλ€. νμ© νμ: {", ".join(ALLOWED_EXTENSIONS)}' | |
| log_print(f"[ERROR] {error_msg}") | |
| return jsonify({'error': error_msg, 'step': 'file_type_check'}), 400 | |
| log_print(f"[4/8] νμΌ νμ νμΈ μλ£: {file.filename}") | |
| # νμΌ ν¬κΈ° νμΈ (Content-Length ν€λ μ¬μ©) | |
| file_size = 0 | |
| try: | |
| # Content-Length ν€λ νμΈ | |
| if request.content_length: | |
| file_size = request.content_length | |
| print(f"Content-Lengthλ‘ νμΌ ν¬κΈ° νμΈ: {file_size} bytes") | |
| else: | |
| # Content-Lengthκ° μμΌλ©΄ νμΌ μ€νΈλ¦Όμμ ν¬κΈ° νμΈ μλ | |
| try: | |
| # νμΌ μ€νΈλ¦Όμ νμ¬ μμΉ μ μ₯ | |
| current_pos = file.tell() | |
| # νμΌ λμΌλ‘ μ΄λ | |
| file.seek(0, os.SEEK_END) | |
| file_size = file.tell() | |
| # μλ μμΉλ‘ 볡μ | |
| file.seek(current_pos, os.SEEK_SET) | |
| print(f"νμΌ μ€νΈλ¦ΌμΌλ‘ ν¬κΈ° νμΈ: {file_size} bytes") | |
| except (AttributeError, IOError, OSError) as e: | |
| print(f"νμΌ ν¬κΈ° νμΈ μ€ν¨ (μ μ₯ ν νμΈ): {str(e)}") | |
| file_size = 0 # μ μ₯ ν νμΈνλλ‘ 0μΌλ‘ μ€μ | |
| except Exception as e: | |
| print(f"νμΌ ν¬κΈ° νμΈ μ€λ₯: {str(e)}") | |
| file_size = 0 # μ μ₯ ν νμΈνλλ‘ 0μΌλ‘ μ€μ | |
| # νμΌ ν¬κΈ° μ¬μ μ²΄ν¬ (κ°λ₯ν κ²½μ°μλ§) | |
| if file_size > 0: | |
| if file_size > 100 * 1024 * 1024: # 100MB | |
| print(f"νμΌ ν¬κΈ° μ΄κ³Ό: {file_size} bytes") | |
| return jsonify({'error': 'νμΌ ν¬κΈ°κ° λ무 ν½λλ€. μ΅λ 100MBκΉμ§ μ λ‘λ κ°λ₯ν©λλ€.'}), 400 | |
| if file_size == 0: | |
| print("λΉ νμΌ μ λ‘λ μλ") | |
| return jsonify({'error': 'λΉ νμΌμ μ λ‘λν μ μμ΅λλ€.'}), 400 | |
| # μμ ν νμΌλͺ μμ± | |
| original_filename = file.filename | |
| filename = secure_filename(original_filename) | |
| if not filename: | |
| return jsonify({'error': 'μ ν¨νμ§ μμ νμΌλͺ μ λλ€.'}), 400 | |
| unique_filename = f"{uuid.uuid4().hex}_{filename}" | |
| file_path = os.path.join(UPLOAD_FOLDER, unique_filename) | |
| # νμΌ μ μ₯ | |
| try: | |
| log_print(f"[6/8] νμΌ μ μ₯ μλ: {file_path}") | |
| file.save(file_path) | |
| log_print(f"[6/8] νμΌ μ μ₯ μλ£: {file_path}") | |
| except IOError as e: | |
| error_msg = f'νμΌ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ IOError: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save'}), 500 | |
| except PermissionError as e: | |
| error_msg = f'νμΌ μ μ₯ κΆν μ€λ₯: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ PermissionError: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save_permission'}), 500 | |
| except Exception as e: | |
| error_msg = f'νμΌ μ μ₯ μ€ν¨: {str(e)}' | |
| log_print(f"[ERROR] νμΌ μ μ₯ Exception: {error_msg}") | |
| traceback.print_exc() | |
| return jsonify({'error': error_msg, 'step': 'file_save'}), 500 | |
| # μ μ₯λ νμΌ ν¬κΈ° νμΈ | |
| if not os.path.exists(file_path): | |
| error_msg = 'νμΌμ΄ μ μ₯λμ§ μμμ΅λλ€.' | |
| print(f"νμΌ μ‘΄μ¬ νμΈ μ€ν¨: {file_path}") | |
| return jsonify({'error': error_msg}), 500 | |
| saved_file_size = os.path.getsize(file_path) | |
| if saved_file_size == 0: | |
| os.remove(file_path) # λΉ νμΌ μμ | |
| error_msg = 'νμΌμ΄ μ λλ‘ μ μ₯λμ§ μμμ΅λλ€.' | |
| print(f"λΉ νμΌ μμ : {file_path}") | |
| return jsonify({'error': error_msg}), 500 | |
| print(f"μ μ₯λ νμΌ ν¬κΈ°: {saved_file_size} bytes") | |
| # λ°μ΄ν°λ² μ΄μ€μ μ μ₯ | |
| try: | |
| log_print(f"[7/8] λ°μ΄ν°λ² μ΄μ€ μ μ₯ μλ: {original_filename}") | |
| uploaded_file = UploadedFile( | |
| filename=unique_filename, | |
| original_filename=original_filename, | |
| file_path=file_path, | |
| file_size=saved_file_size, | |
| model_name=model_name, # μ΄λ―Έ κ²μ¦λ¨ | |
| uploaded_by=current_user.id, | |
| parent_file_id=parent_file_id if parent_file else None # μ΄μ΄μ μ λ‘λμΈ κ²½μ° | |
| ) | |
| db.session.add(uploaded_file) | |
| db.session.flush() # IDλ₯Ό μ»κΈ° μν΄ flush | |
| log_print(f"[7/8] λ°μ΄ν°λ² μ΄μ€ flush μλ£, νμΌ ID: {uploaded_file.id}") | |
| # ν μ€νΈ νμΌμΈ κ²½μ° μ²ν¬λ‘ λΆν νμ¬ μ μ₯ (RAGμ©) | |
| if original_filename.lower().endswith(('.txt', '.md')): | |
| try: | |
| log_print(f"[7/8] μ²ν¬ μμ± μμ: {original_filename}") | |
| log_print(f"[7/8] νμΌ ID: {uploaded_file.id}") | |
| # νμΌ λ΄μ© μ½κΈ° | |
| encoding = 'utf-8' | |
| try: | |
| with open(file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| log_print(f"[7/8] UTF-8 μΈμ½λ©μΌλ‘ νμΌ μ½κΈ° μ±κ³΅: {len(content)}μ") | |
| except UnicodeDecodeError: | |
| log_print(f"[7/8] UTF-8 μΈμ½λ© μ€ν¨, CP949 μλ: {original_filename}") | |
| with open(file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| log_print(f"[7/8] CP949 μΈμ½λ©μΌλ‘ νμΌ μ½κΈ° μ±κ³΅: {len(content)}μ") | |
| # 1. Parent Chunk μμ± (AI λΆμ) - λ¨Όμ μμ± | |
| try: | |
| log_print(f"[7/8] Parent Chunk μμ± μμ (AI λΆμ)...") | |
| parent_chunk = create_parent_chunk_with_ai(uploaded_file.id, content, model_name) | |
| if parent_chunk: | |
| log_print(f"[7/8] β Parent Chunk μμ± μλ£: {original_filename}") | |
| print(f"Parent Chunkκ° μμ±λμμ΅λλ€: {original_filename}") | |
| else: | |
| log_print(f"[7/8] β οΈ κ²½κ³ : Parent Chunk μμ± μ€ν¨: {original_filename}") | |
| print(f"κ²½κ³ : Parent Chunk μμ±μ μ€ν¨νμ΅λλ€: {original_filename}") | |
| except Exception as parent_chunk_error: | |
| # Parent Chunk μμ± μ€ν¨ν΄λ μ λ‘λλ κ³μ μ§ν | |
| log_print(f"[7/8] β οΈ κ²½κ³ : Parent Chunk μμ± μ€ μμΈ λ°μ: {str(parent_chunk_error)}") | |
| print(f"κ²½κ³ : Parent Chunk μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {original_filename}") | |
| import traceback | |
| traceback.print_exc() | |
| # 2. Child Chunk μμ± λ° μ μ₯ (μΉμ λ³ λΆν ) | |
| log_print(f"[8/8] Child Chunk μμ± ν¨μ νΈμΆ μ€...") | |
| chunk_count = create_chunks_for_file(uploaded_file.id, content) | |
| if chunk_count > 0: | |
| log_print(f"[8/8] β μ±κ³΅: νμΌ {original_filename}μ {chunk_count}κ°μ μ²ν¬λ‘ λΆν νμ΅λλ€.") | |
| print(f"νμΌ {original_filename}μ {chunk_count}κ°μ μ²ν¬λ‘ λΆν νμ΅λλ€.") | |
| else: | |
| log_print(f"[8/8] β οΈ κ²½κ³ : μ²ν¬κ° μμ±λμ§ μμμ΅λλ€. (νμΌμ΄ λ무 μ§§κ±°λ λΉμ΄μμ μ μμ΅λλ€.)") | |
| print(f"κ²½κ³ : νμΌ {original_filename}μ λν μ²ν¬κ° μμ±λμ§ μμμ΅λλ€.") | |
| except Exception as e: | |
| error_msg = f"μ²ν¬ μμ± μ€ μ€λ₯: {str(e)}" | |
| log_print(f"[7/8] β μ€λ₯: {error_msg}") | |
| print(error_msg) | |
| import traceback | |
| traceback.print_exc() | |
| # μ²ν¬ μμ± μ€ν¨ν΄λ νμΌ μ λ‘λλ κ³μ μ§ν (κ²½κ³ λ§ νμ) | |
| log_print(f"[7/8] β οΈ κ²½κ³ : μ²ν¬ μμ± μ€ν¨νμ§λ§ νμΌ μ λ‘λλ κ³μ μ§νν©λλ€.") | |
| # μ΅μ’ μ²ν¬ κ°μ νμΈ λ° μ μ₯ | |
| chunk_count = 0 | |
| if original_filename.lower().endswith(('.txt', '.md')): | |
| chunk_count = DocumentChunk.query.filter_by(file_id=uploaded_file.id).count() | |
| log_print(f"[8/8] μ΅μ’ μ²ν¬ κ°μ νμΈ: {chunk_count}κ°") | |
| db.session.commit() | |
| log_print(f"[8/8] λ°μ΄ν°λ² μ΄μ€ μ»€λ° μλ£: {original_filename}") | |
| log_print(f"[8/8] μ°κ²°λ λͺ¨λΈ: {model_name}") | |
| log_print(f"[8/8] μμ±λ μ²ν¬ μ: {chunk_count}") | |
| # νμ΅ μν μμ½ | |
| if chunk_count > 0: | |
| log_print(f"[8/8] β AI νμ΅ μ€λΉ μλ£: {chunk_count}κ° μ²ν¬κ° μ μ₯λμ΄ RAG κ²μμ μ¬μ© κ°λ₯ν©λλ€.") | |
| else: | |
| log_print(f"[8/8] β οΈ κ²½κ³ : μ²ν¬κ° μμ±λμ§ μμ RAG κ²μμ΄ λΆκ°λ₯ν©λλ€.") | |
| log_print(f"{'='*60}") | |
| log_print(f"=== νμΌ μ λ‘λ μ±κ³΅ ===") | |
| log_print(f"{'='*60}\n") | |
| log_print(f"[8/8] μ λ‘λ μλ£ - νμΌ: {original_filename}, λͺ¨λΈ: {model_name}, ν¬κΈ°: {saved_file_size} bytes") | |
| return jsonify({ | |
| 'message': f'νμΌμ΄ μ±κ³΅μ μΌλ‘ μ λ‘λλμμ΅λλ€. (λͺ¨λΈ: {model_name})', | |
| 'file': uploaded_file.to_dict(), | |
| 'model_name': model_name, | |
| 'chunk_count': chunk_count | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = f'λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}' | |
| log_print(f"[ERROR] λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€λ₯: {error_msg}") | |
| traceback.print_exc() | |
| # λ°μ΄ν°λ² μ΄μ€ μ μ₯ μ€ν¨ μ νμΌλ μμ | |
| if 'file_path' in locals() and os.path.exists(file_path): | |
| try: | |
| os.remove(file_path) | |
| log_print(f"μ€λ₯λ‘ μΈν νμΌ μμ : {file_path}") | |
| except Exception as del_e: | |
| log_print(f"νμΌ μμ μ€ν¨: {str(del_e)}") | |
| return jsonify({'error': error_msg, 'step': 'database_save'}), 500 | |
| except Exception as e: | |
| db.session.rollback() | |
| error_msg = str(e) | |
| error_type = type(e).__name__ | |
| log_print(f"\n{'='*60}") | |
| log_print(f"=== μ λ‘λ μ²λ¦¬ μ€ μμΈ λ°μ ===") | |
| log_print(f"μμΈ νμ : {error_type}") | |
| log_print(f"μλ¬ λ©μμ§: {error_msg}") | |
| traceback.print_exc() | |
| log_print(f"{'='*60}\n") | |
| # νμΌ ν¬κΈ° μ΄κ³Ό μ€λ₯ μ²λ¦¬ | |
| if '413' in error_msg or 'Request Entity Too Large' in error_msg or error_type == 'RequestEntityTooLarge': | |
| return jsonify({'error': 'νμΌ ν¬κΈ°κ° λ무 ν½λλ€. μ΅λ 100MBκΉμ§ μ λ‘λ κ°λ₯ν©λλ€.', 'step': 'file_size'}), 413 | |
| return jsonify({'error': f'νμΌ μ λ‘λ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {error_type}: {error_msg}', 'step': 'exception'}), 500 | |
| def get_files(): | |
| """μ λ‘λλ νμΌ λͺ©λ‘ μ‘°ν""" | |
| try: | |
| model_name = request.args.get('model_name', None) | |
| # μλ³Έ νμΌλ§ μ‘°ν (parent_file_idκ° NoneμΈ νμΌ) | |
| # λͺ¨λ μ¬μ©μκ° μ λ‘λλ λͺ¨λ νμΌμ λ³Ό μ μμ | |
| query = UploadedFile.query.filter_by(parent_file_id=None) | |
| print(f"[νμΌ μ‘°ν] λͺ¨λ νμΌ μ‘°ν (μ¬μ©μ: {current_user.username})") | |
| # λͺ¨λΈ νν°λ§ μ μ 체 νμΌ μ νμΈ | |
| total_before_filter = query.count() | |
| print(f"[νμΌ μ‘°ν] νν°λ§ μ νμΌ μ: {total_before_filter}κ°") | |
| if model_name: | |
| query = query.filter_by(model_name=model_name) | |
| print(f"[νμΌ μ‘°ν] λͺ¨λΈ '{model_name}' νν°λ§") | |
| files = query.order_by(UploadedFile.uploaded_at.desc()).all() | |
| # νν°λ§ ν νμΌ μμ λͺ¨λΈλͺ νμΈ | |
| print(f"[νμΌ μ‘°ν] νν°λ§ ν νμΌ μ: {len(files)}κ°") | |
| if len(files) > 0: | |
| print(f"[νμΌ μ‘°ν] 첫 λ²μ§Έ νμΌ λͺ¨λΈλͺ : {files[0].model_name}") | |
| else: | |
| # νν°λ§ κ²°κ³Όκ° μμ λ μ€μ μ‘΄μ¬νλ λͺ¨λΈλͺ νμΈ | |
| all_files = UploadedFile.query.filter_by(parent_file_id=None).all() | |
| unique_models = set(f.model_name for f in all_files if f.model_name) | |
| print(f"[νμΌ μ‘°ν] λ°μ΄ν°λ² μ΄μ€μ μ‘΄μ¬νλ λͺ¨λΈλͺ λͺ©λ‘: {list(unique_models)}") | |
| # κ° μλ³Έ νμΌμ λν΄ μ΄μ΄μ μ λ‘λλ νμΌλ ν¬ν¨ | |
| files_with_children = [] | |
| for file in files: | |
| file_dict = file.to_dict() | |
| # μ²ν¬ κ°μ μΆκ° | |
| chunk_count = DocumentChunk.query.filter_by(file_id=file.id).count() | |
| file_dict['chunk_count'] = chunk_count | |
| # Parent Chunk μ‘΄μ¬ μ¬λΆ νμΈ | |
| has_parent_chunk = ParentChunk.query.filter_by(file_id=file.id).first() is not None | |
| file_dict['has_parent_chunk'] = has_parent_chunk | |
| # μ΄μ΄μ μ λ‘λλ νμΌλ€λ μ‘°ν | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file.id).order_by(UploadedFile.uploaded_at.asc()).all() | |
| child_files_dict = [] | |
| for child in child_files: | |
| child_dict = child.to_dict() | |
| child_chunk_count = DocumentChunk.query.filter_by(file_id=child.id).count() | |
| child_dict['chunk_count'] = child_chunk_count | |
| # Child νμΌλ Parent Chunk νμΈ | |
| child_has_parent_chunk = ParentChunk.query.filter_by(file_id=child.id).first() is not None | |
| child_dict['has_parent_chunk'] = child_has_parent_chunk | |
| child_files_dict.append(child_dict) | |
| file_dict['child_files'] = child_files_dict | |
| files_with_children.append(file_dict) | |
| # λͺ¨λΈλ³ ν΅κ³ μ 보 μΆκ° (μλ³Έ νμΌλ§ μΉ΄μ΄νΈ) | |
| model_stats = {} | |
| if not model_name: | |
| # λͺ¨λ λͺ¨λΈμ ν΅κ³ (μλ³Έ νμΌλ§) | |
| # λͺ¨λ μ¬μ©μκ° λͺ¨λ νμΌμ λ³Ό μ μμ | |
| all_files = UploadedFile.query.filter_by(parent_file_id=None).all() | |
| for file in all_files: | |
| model = file.model_name or 'λ―Έμ§μ ' | |
| if model not in model_stats: | |
| model_stats[model] = {'count': 0, 'total_size': 0} | |
| model_stats[model]['count'] += 1 | |
| model_stats[model]['total_size'] += file.file_size | |
| else: | |
| # νΉμ λͺ¨λΈμ ν΅κ³ | |
| model_stats[model_name] = { | |
| 'count': len(files), | |
| 'total_size': sum(f.file_size for f in files) | |
| } | |
| print(f"[νμΌ μ‘°ν] μ‘°νλ μλ³Έ νμΌ μ: {len(files)}κ°") | |
| return jsonify({ | |
| 'files': files_with_children, | |
| 'model_stats': model_stats, | |
| 'filtered_model': model_name | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_chunks(file_id): | |
| """νμΌμ μ²ν¬ μ 보 μ‘°ν (νμ΅ μν νμΈμ©)""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all() | |
| total_chunks = len(chunks) | |
| # μν μ²ν¬ (μ²μ 3κ°) | |
| sample_chunks = [] | |
| for chunk in chunks[:3]: | |
| sample_chunks.append({ | |
| 'index': chunk.chunk_index, | |
| 'content_preview': chunk.content[:100] + '...' if len(chunk.content) > 100 else chunk.content, | |
| 'content_length': len(chunk.content) | |
| }) | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'model_name': file.model_name, | |
| 'total_chunks': total_chunks, | |
| 'sample_chunks': sample_chunks, | |
| 'learning_status': 'ready' if total_chunks > 0 else 'not_ready', | |
| 'message': f'{total_chunks}κ° μ²ν¬κ° μ μ₯λμ΄ RAG κ²μμ μ¬μ© κ°λ₯ν©λλ€.' if total_chunks > 0 else 'μ²ν¬κ° μμ±λμ§ μμ RAG κ²μμ΄ λΆκ°λ₯ν©λλ€.' | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ²ν¬ μ 보 μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_all_file_chunks(file_id): | |
| """νμΌμ λͺ¨λ μ²ν¬ λͺ©λ‘κ³Ό λ΄μ© μ‘°ν (κ΄λ¦¬μμ©)""" | |
| try: | |
| # κ΄λ¦¬μλ λͺ¨λ νμΌ μ‘°ν κ°λ₯ | |
| if current_user.is_admin: | |
| file = UploadedFile.query.get(file_id) | |
| else: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index.asc()).all() | |
| chunks_data = [] | |
| for chunk in chunks: | |
| chunk_dict = { | |
| 'id': chunk.id, | |
| 'chunk_index': chunk.chunk_index, | |
| 'content': chunk.content, | |
| 'content_length': len(chunk.content), | |
| 'created_at': chunk.created_at.isoformat() if chunk.created_at else None | |
| } | |
| # λ©νλ°μ΄ν° νμ± | |
| if chunk.chunk_metadata: | |
| try: | |
| metadata = json.loads(chunk.chunk_metadata) | |
| chunk_dict['metadata'] = metadata | |
| except: | |
| chunk_dict['metadata'] = None | |
| else: | |
| chunk_dict['metadata'] = None | |
| chunks_data.append(chunk_dict) | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'model_name': file.model_name, | |
| 'total_chunks': len(chunks_data), | |
| 'chunks': chunks_data | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'μ²ν¬ λͺ©λ‘ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_parent_chunk(file_id): | |
| """νμΌμ Parent Chunk μ‘°ν""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_id).first() | |
| if not parent_chunk: | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': False, | |
| 'message': 'Parent Chunkκ° μμ±λμ§ μμμ΅λλ€.' | |
| }), 200 | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': True, | |
| 'parent_chunk': parent_chunk.to_dict(), | |
| 'message': 'Parent Chunkκ° μ‘΄μ¬ν©λλ€.' | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'Parent Chunk μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_file_parent_chunk(file_id): | |
| """νμΌμ Parent Chunk μλ μμ± (μ¬μμ±)""" | |
| try: | |
| file = UploadedFile.query.filter_by(id=file_id, uploaded_by=current_user.id).first() | |
| if not file: | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| # λͺ¨λΈλͺ νμΈ | |
| if not file.model_name: | |
| return jsonify({'error': 'νμΌμ μ°κ²°λ AI λͺ¨λΈμ΄ μμ΅λλ€. Parent Chunkλ₯Ό μμ±ν μ μμ΅λλ€.'}), 400 | |
| # νμΌμ΄ ν μ€νΈ νμΌμΈμ§ νμΈ | |
| if not file.original_filename.lower().endswith(('.txt', '.md')): | |
| return jsonify({'error': 'Parent Chunkλ ν μ€νΈ νμΌ(.txt, .md)μλ§ μμ±ν μ μμ΅λλ€.'}), 400 | |
| # νμΌ λ΄μ© μ½κΈ° | |
| try: | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌμ μ½μ μ μμ΅λλ€: {str(e)}'}), 500 | |
| if not content or len(content.strip()) == 0: | |
| return jsonify({'error': 'νμΌ λ΄μ©μ΄ λΉμ΄μμ΅λλ€.'}), 400 | |
| # Parent Chunk μμ± | |
| print(f"[Parent Chunk μλ μμ±] νμΌ ID {file_id}μ λν Parent Chunk μμ± μμ") | |
| print(f"[Parent Chunk μλ μμ±] λͺ¨λΈλͺ : {file.model_name}") | |
| print(f"[Parent Chunk μλ μμ±] νμΌλͺ : {file.original_filename}") | |
| parent_chunk = create_parent_chunk_with_ai(file_id, content, file.model_name) | |
| if parent_chunk: | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'has_parent_chunk': True, | |
| 'parent_chunk': parent_chunk.to_dict(), | |
| 'message': 'Parent Chunkκ° μ±κ³΅μ μΌλ‘ μμ±λμμ΅λλ€.' | |
| }), 200 | |
| else: | |
| return jsonify({ | |
| 'error': 'Parent Chunk μμ±μ μ€ν¨νμ΅λλ€. μλ² λ‘κ·Έλ₯Ό νμΈνμΈμ.', | |
| 'file_id': file_id, | |
| 'filename': file.original_filename | |
| }), 500 | |
| except Exception as e: | |
| return jsonify({'error': f'Parent Chunk μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_file_metadata(file_id): | |
| """νμΌμ λͺ¨λ μ²ν¬μ λ©νλ°μ΄ν° μμ± (μλ μμ±)""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| # κΆν νμΈ | |
| if not current_user.is_admin and file.uploaded_by != current_user.id: | |
| return jsonify({'error': 'κΆνμ΄ μμ΅λλ€.'}), 403 | |
| # λͺ¨λΈλͺ νμΈ | |
| if not file.model_name: | |
| return jsonify({'error': 'νμΌμ μ°κ²°λ AI λͺ¨λΈμ΄ μμ΅λλ€. λ©νλ°μ΄ν°λ₯Ό μμ±ν μ μμ΅λλ€.'}), 400 | |
| # ν μ€νΈ νμΌλ§ κ°λ₯ | |
| if not file.original_filename.lower().endswith(('.txt', '.md')): | |
| return jsonify({'error': 'λ©νλ°μ΄ν°λ ν μ€νΈ νμΌ(.txt, .md)μλ§ μμ±ν μ μμ΅λλ€.'}), 400 | |
| # νμΌ λ΄μ© μ½κΈ° | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| # λͺ¨λ μ²ν¬ κ°μ Έμ€κΈ° | |
| chunks = DocumentChunk.query.filter_by(file_id=file_id).order_by(DocumentChunk.chunk_index).all() | |
| if not chunks: | |
| return jsonify({'error': 'μ²ν¬κ° μμ΅λλ€. λ¨Όμ νμΌμ μ λ‘λνμΈμ.'}), 400 | |
| print(f"[λ©νλ°μ΄ν° μμ±] νμΌ ID {file_id}μ λν λ©νλ°μ΄ν° μμ± μμ") | |
| print(f"[λ©νλ°μ΄ν° μμ±] λͺ¨λΈλͺ : {file.model_name}") | |
| print(f"[λ©νλ°μ΄ν° μμ±] νμΌλͺ : {file.original_filename}") | |
| print(f"[λ©νλ°μ΄ν° μμ±] μ²ν¬ κ°μ: {len(chunks)}κ°") | |
| # κ° μ²ν¬μ λ©νλ°μ΄ν° μμ± | |
| success_count = 0 | |
| fail_count = 0 | |
| for chunk in chunks: | |
| try: | |
| # κΈ°μ‘΄ λ©νλ°μ΄ν° μ½κΈ° | |
| existing_metadata = {} | |
| if chunk.chunk_metadata: | |
| try: | |
| existing_metadata = json.loads(chunk.chunk_metadata) | |
| except: | |
| existing_metadata = {} | |
| # μ λ©νλ°μ΄ν° μΆμΆ | |
| new_metadata = extract_chunk_metadata( | |
| chunk_content=chunk.content, | |
| full_content=content, # μλ³Έ μΉμμ€ μ 체 λ΄μ© μ°Έμ‘° | |
| chunk_index=chunk.chunk_index, | |
| file_id=file_id, | |
| model_name=file.model_name | |
| ) | |
| # κΈ°μ‘΄ λ©νλ°μ΄ν°μ μ λ©νλ°μ΄ν° λ³ν© (μ λ©νλ°μ΄ν°κ° μ°μ ) | |
| # κΈ°μ‘΄ λ©νλ°μ΄ν°μ λͺ¨λ νλλ₯Ό μ μ§νλ, μλ‘ μΆμΆν νλλ‘ μ λ°μ΄νΈ | |
| # chapter νλλ νμΌ μ λ‘λ μ μΆκ°λ νμ°¨ μ 보μ΄λ―λ‘ μ μ§ | |
| merged_metadata = existing_metadata.copy() | |
| for key, value in new_metadata.items(): | |
| if value is not None and value != []: | |
| # 리μ€νΈμΈ κ²½μ° μ€λ³΅ μ κ±° ν λ³ν© | |
| if isinstance(value, list) and isinstance(merged_metadata.get(key), list): | |
| merged_list = merged_metadata.get(key, []).copy() | |
| for item in value: | |
| if item not in merged_list: | |
| merged_list.append(item) | |
| merged_metadata[key] = merged_list | |
| else: | |
| merged_metadata[key] = value | |
| # λ©νλ°μ΄ν°λ₯Ό JSON λ¬Έμμ΄λ‘ λ³ν | |
| metadata_json = json.dumps(merged_metadata, ensure_ascii=False) if merged_metadata else None | |
| # μ²ν¬μ λ©νλ°μ΄ν° μ μ₯ | |
| chunk.chunk_metadata = metadata_json | |
| success_count += 1 | |
| # μ§ν μν© μΆλ ₯ (10κ°λ§λ€) | |
| if (success_count + fail_count) % 10 == 0: | |
| print(f"[λ©νλ°μ΄ν° μμ±] μ§ν μ€: {success_count + fail_count}/{len(chunks)}κ° μ²ν¬ μ²λ¦¬ μ€...") | |
| except Exception as e: | |
| print(f"[λ©νλ°μ΄ν° μμ±] κ²½κ³ : μ²ν¬ {chunk.chunk_index} λ©νλ°μ΄ν° μμ± μ€ν¨: {str(e)}") | |
| fail_count += 1 | |
| continue | |
| # λ°μ΄ν°λ² μ΄μ€ μ»€λ° | |
| db.session.commit() | |
| print(f"[λ©νλ°μ΄ν° μμ±] μλ£: {success_count}κ° μ±κ³΅, {fail_count}κ° μ€ν¨") | |
| return jsonify({ | |
| 'file_id': file_id, | |
| 'filename': file.original_filename, | |
| 'total_chunks': len(chunks), | |
| 'success_count': success_count, | |
| 'fail_count': fail_count, | |
| 'message': f'λ©νλ°μ΄ν° μμ±μ΄ μλ£λμμ΅λλ€. (μ±κ³΅: {success_count}κ°, μ€ν¨: {fail_count}κ°)' | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| print(f"[λ©νλ°μ΄ν° μμ±] μ€λ₯: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({'error': f'λ©νλ°μ΄ν° μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_file(file_id): | |
| """μ λ‘λλ νμΌ μμ (μ°κ΄λ λͺ¨λ νμΌλ ν¨κ» μμ )""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| # μλ³Έ νμΌμΈ κ²½μ° (parent_file_idκ° NoneμΈ κ²½μ°) | |
| # μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌλ ν¨κ» μμ | |
| files_to_delete = [] | |
| if file.parent_file_id is None: | |
| # μλ³Έ νμΌμ΄λ©΄, μ΄μ΄μ μ λ‘λλ λͺ¨λ νμΌλ μ°Ύμμ μμ | |
| child_files = UploadedFile.query.filter_by(parent_file_id=file_id).all() | |
| files_to_delete = [file] + child_files | |
| print(f"[νμΌ μμ ] μλ³Έ νμΌ μμ : {file.original_filename}, μ°κ΄ νμΌ {len(child_files)}κ°λ ν¨κ» μμ ") | |
| else: | |
| # μ΄μ΄μ μ λ‘λλ νμΌμ΄λ©΄ μλ³Έ νμΌλ ν¨κ» μμ | |
| parent_file = UploadedFile.query.get(file.parent_file_id) | |
| if parent_file: | |
| # μλ³Έ νμΌκ³Ό λͺ¨λ μ°κ΄ νμΌ μμ | |
| all_child_files = UploadedFile.query.filter_by(parent_file_id=file.parent_file_id).all() | |
| files_to_delete = [parent_file] + all_child_files | |
| print(f"[νμΌ μμ ] μ΄μ΄μ μ λ‘λλ νμΌ μμ : {file.original_filename}, μλ³Έ λ° μ°κ΄ νμΌ {len(all_child_files)}κ°λ ν¨κ» μμ ") | |
| else: | |
| files_to_delete = [file] | |
| deleted_count = 0 | |
| deleted_files = [] | |
| for file_to_delete in files_to_delete: | |
| try: | |
| # νμΌ μμ€ν μμ μμ | |
| if os.path.exists(file_to_delete.file_path): | |
| os.remove(file_to_delete.file_path) | |
| print(f"[νμΌ μμ ] νμΌ μμ€ν μμ μμ : {file_to_delete.file_path}") | |
| # κ΄λ ¨ Child Chunk (DocumentChunk) μμ | |
| child_chunk_count = DocumentChunk.query.filter_by(file_id=file_to_delete.id).count() | |
| if child_chunk_count > 0: | |
| DocumentChunk.query.filter_by(file_id=file_to_delete.id).delete() | |
| print(f"[νμΌ μμ ] Child Chunk {child_chunk_count}κ° μμ μλ£") | |
| # λ²‘ν° DBμμλ ν΄λΉ νμΌμ μ²ν¬ μμ | |
| try: | |
| vector_db = get_vector_db() | |
| vector_db.delete_chunks_by_file_id(file_to_delete.id) | |
| print(f"[νμΌ μμ ] λ²‘ν° DBμμ μ²ν¬ μμ μλ£") | |
| except Exception as vector_e: | |
| print(f"[νμΌ μμ ] λ²‘ν° DB μμ μ€λ₯ (무μ): {str(vector_e)}") | |
| # κ΄λ ¨ Parent Chunk μμ | |
| parent_chunk = ParentChunk.query.filter_by(file_id=file_to_delete.id).first() | |
| if parent_chunk: | |
| db.session.delete(parent_chunk) | |
| print(f"[νμΌ μμ ] Parent Chunk μμ μλ£") | |
| deleted_files.append(file_to_delete.original_filename) | |
| db.session.delete(file_to_delete) | |
| deleted_count += 1 | |
| print(f"[νμΌ μμ ] λ°μ΄ν°λ² μ΄μ€μμ νμΌ μμ μλ£: {file_to_delete.original_filename}") | |
| except Exception as e: | |
| print(f"[νμΌ μμ μ€λ₯] {file_to_delete.original_filename}: {str(e)}") | |
| import traceback | |
| traceback.print_exc() | |
| db.session.commit() | |
| message = f'νμΌμ΄ μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€.' | |
| if deleted_count > 1: | |
| message = f'νμΌ {deleted_count}κ°κ° μ±κ³΅μ μΌλ‘ μμ λμμ΅λλ€. (μλ³Έ λ° μ°κ΄ νμΌ ν¬ν¨)' | |
| return jsonify({ | |
| 'message': message, | |
| 'deleted_count': deleted_count, | |
| 'deleted_files': deleted_files | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'νμΌ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_file_content(file_id): | |
| """μ λ‘λλ νμΌ λ΄μ© μ‘°ν""" | |
| try: | |
| file = UploadedFile.query.get_or_404(file_id) | |
| if not os.path.exists(file.file_path): | |
| return jsonify({'error': 'νμΌμ μ°Ύμ μ μμ΅λλ€.'}), 404 | |
| # ν μ€νΈ νμΌ μ½κΈ° | |
| encoding = 'utf-8' | |
| try: | |
| with open(file.file_path, 'r', encoding=encoding) as f: | |
| content = f.read() | |
| except UnicodeDecodeError: | |
| # UTF-8λ‘ μ½μ μ μμΌλ©΄ λ€λ₯Έ μΈμ½λ© μλ | |
| with open(file.file_path, 'r', encoding='cp949') as f: | |
| content = f.read() | |
| return jsonify({ | |
| 'content': content, | |
| 'filename': file.original_filename | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'νμΌ λ΄μ© μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_chat_sessions(): | |
| """μ¬μ©μμ λν μΈμ λͺ©λ‘ μ‘°ν (μ΅κ·Ό 20κ°λ§ νμ)""" | |
| try: | |
| sessions = ChatSession.query.filter_by(user_id=current_user.id)\ | |
| .order_by(ChatSession.updated_at.desc())\ | |
| .limit(20).all() | |
| return jsonify({ | |
| 'sessions': [session.to_dict() for session in sessions] | |
| }), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def create_chat_session(): | |
| """μ λν μΈμ μμ±""" | |
| try: | |
| data = request.json | |
| title = data.get('title', 'μ λν') | |
| model_name = data.get('model_name', None) | |
| session = ChatSession( | |
| user_id=current_user.id, | |
| title=title, | |
| model_name=model_name | |
| ) | |
| db.session.add(session) | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λν μΈμ μ΄ μμ±λμμ΅λλ€.', | |
| 'session': session.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ± μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def get_chat_session(session_id): | |
| """λν μΈμ μμΈ μ‘°ν (λ©μμ§ ν¬ν¨)""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| session_dict = session.to_dict() | |
| session_dict['messages'] = [msg.to_dict() for msg in session.messages] | |
| return jsonify({'session': session_dict}), 200 | |
| except Exception as e: | |
| return jsonify({'error': f'λν μΈμ μ‘°ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def update_chat_session(session_id): | |
| """λν μΈμ μμ (μ λͺ© λ±)""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| data = request.json | |
| if 'title' in data: | |
| session.title = data['title'] | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λν μΈμ μ΄ μμ λμμ΅λλ€.', | |
| 'session': session.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def delete_chat_session(session_id): | |
| """λν μΈμ μμ """ | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| db.session.delete(session) | |
| db.session.commit() | |
| return jsonify({'message': 'λν μΈμ μ΄ μμ λμμ΅λλ€.'}), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λν μΈμ μμ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |
| def add_chat_message(session_id): | |
| """λν λ©μμ§ μΆκ°""" | |
| try: | |
| session = ChatSession.query.filter_by( | |
| id=session_id, | |
| user_id=current_user.id | |
| ).first_or_404() | |
| data = request.json | |
| role = data.get('role', 'user') | |
| content = data.get('content', '') | |
| if not content: | |
| return jsonify({'error': 'λ©μμ§ λ΄μ©μ΄ νμν©λλ€.'}), 400 | |
| message = ChatMessage( | |
| session_id=session_id, | |
| role=role, | |
| content=content | |
| ) | |
| db.session.add(message) | |
| # μΈμ μ λͺ© μ λ°μ΄νΈ (첫 μ¬μ©μ λ©μμ§μΈ κ²½μ°) | |
| if not session.title or session.title == 'μ λν': | |
| if role == 'user': | |
| title = content[:30] + '...' if len(content) > 30 else content | |
| session.title = title | |
| session.updated_at = datetime.utcnow() | |
| db.session.commit() | |
| return jsonify({ | |
| 'message': 'λ©μμ§κ° μΆκ°λμμ΅λλ€.', | |
| 'chat_message': message.to_dict() | |
| }), 200 | |
| except Exception as e: | |
| db.session.rollback() | |
| return jsonify({'error': f'λ©μμ§ μΆκ° μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}'}), 500 | |