dohyune commited on
Commit
d08d599
ยท
verified ยท
1 Parent(s): adb068a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1076 -0
app.py ADDED
@@ -0,0 +1,1076 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PROBIN - RFx ๋ฌธ์„œ ๋ถ„์„ AI (ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ + Grok ์ ๊ฒ€ ๋ฒ„์ „)
3
+
4
+ """
5
+ import streamlit as st
6
+ import fitz # PyMuPDF
7
+ import chromadb
8
+ from sentence_transformers import SentenceTransformer, util
9
+ import requests
10
+ import os
11
+ import re
12
+ import shutil
13
+ from collections import Counter
14
+ import numpy as np
15
+ from typing import List, Dict, Tuple
16
+ import base64
17
+ from dotenv import load_dotenv
18
+ import json
19
+
20
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
21
+ load_dotenv()
22
+
23
+ # Configuration
24
+ GROK_API_KEY = os.getenv("GROK_API_KEY")
25
+ GROK_API_BASE = "https://api.x.ai/v1"
26
+ CHROMA_DIR = "./chroma_db"
27
+ EMBEDDING_MODEL = 'jhgan/ko-sroberta-multitask'
28
+
29
+ # ==================== ํ•˜์ด๋ผ์ดํŠธ ์„ค์ • ํด๋ž˜์Šค ====================
30
+ class HighlightConfig:
31
+ """ํ•˜์ด๋ผ์ดํŠธ ์„ค์ • - ๋…ธ๋ž€์ƒ‰ ๊ณ ์ •"""
32
+
33
+ def __init__(self):
34
+ # ํ•˜์ด๋ผ์ดํŠธ ์ƒ‰์ƒ - ๋…ธ๋ž€์ƒ‰ ๊ณ ์ •
35
+ self.color = [1.0, 1.0, 0.0] # ๋…ธ๋ž€์ƒ‰ (RGB 0-1 ๋ฒ”์œ„)
36
+
37
+ # Page config
38
+ st.set_page_config(
39
+ page_title="PROBIN",
40
+ page_icon="๐Ÿ”ฎ",
41
+ layout="wide",
42
+ initial_sidebar_state="expanded"
43
+ )
44
+
45
+ # Custom CSS
46
+ st.markdown("""
47
+ <style>
48
+ [data-testid="stSidebar"] {
49
+ background: linear-gradient(180deg,
50
+ #667eea 0%,
51
+ #764ba2 100%);
52
+ box-shadow: 4px 0 30px rgba(0,0,0,0.2);
53
+ width: 290px !important;
54
+ }
55
+
56
+ [data-testid="stSidebar"] h1 {
57
+ color: white !important;
58
+ text-shadow: 2px 2px 15px rgba(0,0,0,0.4);
59
+ }
60
+
61
+ /* ํŒŒ์ผ ์—…๋กœ๋” ๋ฐฐ๊ฒฝ ํˆฌ๋ช…ํ•˜๊ฒŒ */
62
+ [data-testid="stSidebar"] [data-testid="stFileUploader"] {
63
+ background: rgba(255,255,255,0.15);
64
+ border-radius: 15px;
65
+ padding: 1.5rem;
66
+ border: 3px dashed rgba(255,255,255,0.4);
67
+ transition: all 0.3s ease;
68
+ backdrop-filter: blur(10px);
69
+ }
70
+
71
+ /* ํŒŒ์ผ ์—…๋กœ๋” ๋‚ด๋ถ€ ์„น์…˜๋„ ํˆฌ๋ช…ํ•˜๊ฒŒ */
72
+ [data-testid="stFileUploader"] > section {
73
+ background: transparent !important;
74
+ }
75
+
76
+ /* ํŒŒ์ผ ์—…๋กœ๋” ๋“œ๋ž˜๊ทธ ์˜์—ญ */
77
+ [data-testid="stFileUploader"] > section > div {
78
+ background: transparent !important;
79
+ }
80
+
81
+ /* ์—…๋กœ๋“œ๋œ ํŒŒ์ผ ํ‘œ์‹œ ์˜์—ญ */
82
+ [data-testid="stFileUploader"] [data-testid="stMarkdownContainer"] {
83
+ color: #fafafa;
84
+ }
85
+ /* ํ•˜์–€ ๋ฐ•์Šค(๋“œ๋กญ์กด) ์Šคํƒ€์ผ */
86
+ [data-testid="stSidebar"] [data-testid="stFileUploader"] > section,
87
+ [data-testid="stSidebar"] [data-testid="stFileUploader"] section > div {
88
+ background: transparent !important;
89
+ border: none !important;
90
+ }
91
+ /* ๋“œ๋กญ์กด ๋‚ด๋ถ€ ํ…์ŠคํŠธ ์ƒ‰์ƒ */
92
+ [data-testid="stSidebar"] [data-testid="stFileUploader"] [data-testid="stMarkdownContainer"] p {
93
+ color: rgba(255,255,255,0.9) !important;
94
+ }
95
+ /* "ํŒŒ์ผ ์ฐพ๊ธฐ" ๋ฒ„ํŠผ */
96
+ [data-testid="stSidebar"] [data-testid="stFileUploader"] button[kind="secondary"] {
97
+ background: rgba(255,255,255,0.2) !important;
98
+ color: white !important;
99
+ border: 1px solid rgba(255,255,255,0.3) !important;
100
+ }
101
+ /* ์‚ฌ์ด๋“œ๋ฐ” ๋ฒ„ํŠผ ์Šคํƒ€์ผ ์—…๋ฐ์ดํŠธ */
102
+ [data-testid="stSidebar"] .stButton button {
103
+ background: rgba(255,255,255,0.15) !important;
104
+ color: white !important;
105
+ border: 2px solid rgba(255,255,255,0.4) !important;
106
+ border-radius: 12px !important;
107
+ font-weight: 700 !important;
108
+ padding: 0.75rem 1.5rem !important;
109
+ backdrop-filter: blur(10px) !important;
110
+ transition: all 0.3s ease !important;
111
+ box-shadow: 0 4px 15px rgba(0,0,0,0.1) !important;
112
+ }
113
+ [data-testid="stSidebar"] .stButton button:hover {
114
+ background: rgba(255,255,255,0.25) !important;
115
+ border-color: rgba(255,255,255,0.6) !important;
116
+ transform: translateY(-2px) scale(1.02) !important;
117
+ box-shadow: 0 6px 20px rgba(0,0,0,0.2) !important;
118
+ }
119
+ [data-testid="stSidebar"] .stButton button:active {
120
+ transform: translateY(0px) scale(0.98) !important;
121
+ }
122
+ /* Primary ๋ฒ„ํŠผ (๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹œ์ž‘) ํŠน๋ณ„ ์Šคํƒ€์ผ */
123
+ [data-testid="stSidebar"] .stButton button[kind="primary"] {
124
+ background: rgba(255,255,255,0.25) !important;
125
+ border: 2px solid rgba(255,255,255,0.5) !important;
126
+ font-size: 1.05rem !important;
127
+ }
128
+ [data-testid="stSidebar"] .stButton button[kind="primary"]:hover {
129
+ background: rgba(255,255,255,0.35) !important;
130
+ border-color: rgba(255,255,255,0.7) !important;
131
+ }
132
+ [data-testid="stSidebar"] [data-testid="stAlert"] {
133
+ background-color: rgba(255, 255, 255, 0.001) !important;
134
+ border-radius: 0.5rem !important;
135
+ }
136
+ [data-testid="stAlert"] p {
137
+ color: rgb(250, 250, 250); /* ํฐ์ƒ‰ */
138
+ }
139
+ /* ๋ฉ”์ธ ์ปจํ…์ธ  ์ „์ฒด ๋„ˆ๋น„ ์‚ฌ์šฉ */
140
+ .main .block-container {
141
+ max-width: 100%;
142
+ padding-left: 2rem;
143
+ padding-right: 2rem;
144
+ }
145
+
146
+ /* ํ—ค๋” ์Šคํƒ€์ผ - ๋ฐ•์Šค ์ œ๊ฑฐ, ํ…์ŠคํŠธ ๊ทธ๋ฆผ์ž๋งŒ */
147
+ .probin-header {
148
+ padding: 1.5rem 2rem;
149
+ margin-bottom: 2rem;
150
+ }
151
+ .probin-title {
152
+ font-size: 2.5rem;
153
+ font-weight: bold;
154
+ color: white;
155
+ margin: 0;
156
+ text-align: center;
157
+ text-shadow: 2px 2px 8px rgba(0, 0, 0, 0.4),
158
+ 0 0 20px rgba(102, 126, 234, 0.4);
159
+ }
160
+ .probin-subtitle {
161
+ font-size: 1rem;
162
+ color: rgba(255, 255, 255, 0.9);
163
+ text-align: center;
164
+ margin-top: 0.5rem;
165
+ text-shadow: 1px 1px 6px rgba(0, 0, 0, 0.4);
166
+ }
167
+
168
+ /* ํŒŒ์ผ ์—…๋กœ๋” ์ปค์Šคํ„ฐ๋งˆ์ด์ง• */
169
+ [data-testid="stFileUploader"] {
170
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
171
+ border: 3px dashed #667eea;
172
+ border-radius: 1rem;
173
+ padding: 3rem 2rem;
174
+ }
175
+
176
+ [data-testid="stFileUploader"] > div {
177
+ text-align: center;
178
+ }
179
+
180
+ [data-testid="stFileUploader"] label {
181
+ font-size: 1.2rem !important;
182
+ color: #2D3748 !important;
183
+ font-weight: 600 !important;
184
+ }
185
+
186
+ /* PDF ์ปจํ…Œ์ด๋„ˆ */
187
+ .pdf-container {
188
+ border: 2px solid #E2E8F0;
189
+ border-radius: 0.5rem;
190
+ padding: 0.5rem;
191
+ height: 705px;
192
+ overflow-y: auto;
193
+ background: white;
194
+ }
195
+
196
+ /* ์ฑ„ํŒ… ์ปจํ…Œ์ด๋„ˆ - ์Šคํฌ๋กค ์ถ”๊ฐ€ */
197
+ .chat-container {
198
+ border: 2px solid #E2E8F0;
199
+ border-radius: 0.5rem;
200
+ padding: 1rem;
201
+ height: 650px;
202
+ overflow-y: auto;
203
+ background: white;
204
+ margin-bottom: 0.5rem;
205
+ }
206
+
207
+ /* ์ฑ„ํŒ… ์ž…๋ ฅ์ฐฝ๊ณผ ์ปจํ…Œ์ด๋„ˆ ๊ฐ„๊ฒฉ ์ตœ์†Œํ™” */
208
+ [data-testid="stChatInput"] {
209
+ margin-top: 0 !important;
210
+ padding-top: 0 !important;
211
+ }
212
+
213
+ /* ์ฑ„ํŒ… ์Šคํƒ€์ผ */
214
+ .source-box {
215
+ background: #F1F5F9;
216
+ padding: 1rem;
217
+ border-radius: 0.5rem;
218
+ margin: 0.5rem 0;
219
+ border-left: 3px solid #667eea;
220
+ }
221
+
222
+ .source-title {
223
+ font-weight: bold;
224
+ color: #667eea;
225
+ margin-bottom: 0.5rem;
226
+ }
227
+
228
+ .page-indicator {
229
+ background: #667eea;
230
+ color: white;
231
+ padding: 0.3rem 0.8rem;
232
+ border-radius: 1rem;
233
+ font-size: 0.85rem;
234
+ display: inline-block;
235
+ margin: 0.2rem;
236
+ }
237
+
238
+ .highlight-indicator {
239
+ background: #FEF08A;
240
+ color: #854D0E;
241
+ padding: 0.5rem 1rem;
242
+ border-radius: 0.5rem;
243
+ margin: 0.5rem 0;
244
+ font-weight: bold;
245
+ border-left: 4px solid #EAB308;
246
+ }
247
+
248
+ /* ์‚ฌ์šฉ ์•ˆ๋‚ด ์Šคํƒ€์ผ */
249
+ .usage-guide {
250
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
251
+ padding: 2rem;
252
+ border-radius: 1rem;
253
+ margin-bottom: 2rem;
254
+ height: 100%;
255
+ }
256
+
257
+ .guide-step {
258
+ display: flex;
259
+ align-items: center;
260
+ margin: 1.5rem 0;
261
+ font-size: 1.1rem;
262
+ color: #2D3748;
263
+ }
264
+
265
+ .step-number {
266
+ background: #667eea;
267
+ color: white;
268
+ width: 2.5rem;
269
+ height: 2.5rem;
270
+ border-radius: 50%;
271
+ display: flex;
272
+ align-items: center;
273
+ justify-content: center;
274
+ font-weight: bold;
275
+ font-size: 1.2rem;
276
+ margin-right: 1rem;
277
+ flex-shrink: 0;
278
+ }
279
+
280
+ /* ๋ทฐ์–ด ํ—ค๋” ์Šคํƒ€์ผ */
281
+ .viewer-header {
282
+ display: flex;
283
+ justify-content: space-between;
284
+ align-items: center;
285
+ margin-bottom: 1rem;
286
+ }
287
+ </style>
288
+ """, unsafe_allow_html=True)
289
+
290
+
291
+ def init_session():
292
+ """์„ธ์…˜ ์ƒํƒœ ์ดˆ๊ธฐํ™”"""
293
+ if 'processed' not in st.session_state:
294
+ st.session_state.processed = False
295
+ if 'vector_db' not in st.session_state:
296
+ st.session_state.vector_db = None
297
+ if 'embedder' not in st.session_state:
298
+ st.session_state.embedder = None
299
+ if 'chat_history' not in st.session_state:
300
+ st.session_state.chat_history = []
301
+ if 'doc_metadata' not in st.session_state:
302
+ st.session_state.doc_metadata = {}
303
+ if 'pdf_bytes' not in st.session_state:
304
+ st.session_state.pdf_bytes = None
305
+ if 'pdf_pages_text' not in st.session_state:
306
+ st.session_state.pdf_pages_text = {}
307
+ if 'current_highlights' not in st.session_state:
308
+ st.session_state.current_highlights = []
309
+ if 'zoom_level' not in st.session_state:
310
+ st.session_state.zoom_level = 2.0
311
+ if 'highlight_config' not in st.session_state:
312
+ st.session_state.highlight_config = HighlightConfig()
313
+ if 'processing_query' not in st.session_state:
314
+ st.session_state.processing_query = None
315
+
316
+
317
+ def extract_text_from_pdf(pdf_file) -> Tuple[List[str], List[Dict], bytes, Dict]:
318
+ """
319
+ PDF์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
320
+
321
+ ์ˆ˜์ • ์‚ฌํ•ญ:
322
+ - CHUNK_SIZE: 300 โ†’ 800
323
+ - OVERLAP_SIZE: 60 โ†’ 150
324
+ """
325
+ pdf_bytes = pdf_file.read()
326
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
327
+
328
+ chunks = []
329
+ metadata_list = []
330
+ pages_text = {}
331
+
332
+ # ==================== ์ˆ˜์ •๋œ ์ฒญํฌ ์„ค์ • ====================
333
+ CHUNK_SIZE = 300 # 300์—์„œ 800์œผ๋กœ ์ฆ๊ฐ€
334
+ OVERLAP_SIZE = 60 # 60์—์„œ 150์œผ๋กœ ์ฆ๊ฐ€
335
+ # ========================================================
336
+
337
+ for page_num in range(len(doc)):
338
+ page = doc[page_num]
339
+ text = page.get_text("text")
340
+ pages_text[page_num + 1] = text
341
+
342
+ if not text.strip():
343
+ continue
344
+
345
+ lines = [line.strip() for line in text.split('\n') if line.strip()]
346
+ cleaned_text = '\n'.join(lines)
347
+
348
+ sentences = re.split(r'([.!?]\s+|\n{2,})', cleaned_text)
349
+ sentences = [s for s in sentences if s.strip()]
350
+
351
+ current_chunk = ""
352
+ current_length = 0
353
+
354
+ for sentence in sentences:
355
+ sentence_length = len(sentence)
356
+
357
+ if current_length + sentence_length > CHUNK_SIZE and current_chunk:
358
+ chunks.append(current_chunk.strip())
359
+ metadata_list.append({
360
+ "page": page_num + 1,
361
+ "source": pdf_file.name,
362
+ "chunk_type": "paragraph"
363
+ })
364
+
365
+ overlap_text = current_chunk[-OVERLAP_SIZE:] if len(current_chunk) > OVERLAP_SIZE else current_chunk
366
+ current_chunk = overlap_text + sentence
367
+ current_length = len(current_chunk)
368
+ else:
369
+ current_chunk += sentence
370
+ current_length += sentence_length
371
+
372
+ if current_chunk.strip():
373
+ chunks.append(current_chunk.strip())
374
+ metadata_list.append({
375
+ "page": page_num + 1,
376
+ "source": pdf_file.name,
377
+ "chunk_type": "paragraph"
378
+ })
379
+
380
+ doc.close()
381
+ return chunks, metadata_list, pdf_bytes, pages_text
382
+
383
+
384
+ @st.cache_resource
385
+ def load_embedding_model():
386
+ """์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ"""
387
+ return SentenceTransformer(EMBEDDING_MODEL)
388
+
389
+
390
+ def create_vector_db(chunks: List[str], metadata_list: List[Dict]):
391
+ """๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ƒ์„ฑ - ๋ฉ”๋ชจ๋ฆฌ ๊ธฐ๋ฐ˜์œผ๋กœ ์•ˆ์ •์„ฑ ํ–ฅ์ƒ"""
392
+ embedder = load_embedding_model()
393
+
394
+ # ๋ฉ”๋ชจ๋ฆฌ ๊ธฐ๋ฐ˜ ChromaDB ์‚ฌ์šฉ (ํŒŒ์ผ ์‹œ์Šคํ…œ ๋ฌธ์ œ ํšŒํ”ผ)
395
+ client = chromadb.EphemeralClient(
396
+ settings=chromadb.Settings(
397
+ anonymized_telemetry=False,
398
+ allow_reset=True
399
+ )
400
+ )
401
+
402
+ # ์ปฌ๋ ‰์…˜ ์ƒ์„ฑ
403
+ try:
404
+ client.delete_collection("rfx_docs")
405
+ except Exception:
406
+ pass
407
+
408
+ collection = client.create_collection(
409
+ name="rfx_docs",
410
+ metadata={"hnsw:space": "cosine"}
411
+ )
412
+
413
+ # ๋ฐฐ์น˜ ์ž„๋ฒ ๋”ฉ
414
+ batch_size = 32
415
+ all_embeddings = []
416
+
417
+ for i in range(0, len(chunks), batch_size):
418
+ batch = chunks[i:i + batch_size]
419
+ embeddings = embedder.encode(batch, show_progress_bar=False, convert_to_numpy=True)
420
+ all_embeddings.extend(embeddings)
421
+
422
+ ids = [f"doc_{i}" for i in range(len(chunks))]
423
+ collection.add(
424
+ embeddings=[emb.tolist() for emb in all_embeddings],
425
+ documents=chunks,
426
+ metadatas=metadata_list,
427
+ ids=ids
428
+ )
429
+
430
+ return collection, embedder
431
+
432
+
433
+ def extract_keywords(text: str, top_n: int = 5) -> List[str]:
434
+ """ํ‚ค์›Œ๋“œ ์ถ”์ถœ"""
435
+ words_with_numbers = re.findall(r'[๊ฐ€-ํžฃ]*\d+[๊ฐ€-ํžฃ]*', text)
436
+ words = re.findall(r'[๊ฐ€-ํžฃ]{2,}', text)
437
+
438
+ stopwords = {
439
+ '๊ฒƒ', '๋“ฑ', '๋ฐ', '๊ทธ', '์ด', '์ €', '์ˆ˜', '๋•Œ', '์ค‘', '๋‚ด', '๋…„', '์›”', '์ผ',
440
+ '๊ฒฝ์šฐ', '๋Œ€ํ•œ', 'ํ†ตํ•ด', '์œ„ํ•ด', '๊ด€๋ จ', '์žˆ๋Š”', 'ํ•˜๋Š”', '๋˜๋Š”', '์ด๋Ÿฐ', '์ €๋Ÿฐ',
441
+ '์–ด๋–ค', '๋ฌด์Šจ', '์–ด๋А', '๋ˆ„๊ตฌ', '์–ธ์ œ', '์–ด๋””', '๋ฌด์—‡', '์–ด๋–ป๊ฒŒ', '์™œ',
442
+ '์•Œ๋ ค', '์„ค๋ช…', '๋งํ•ด', '๋Œ€ํ•ด', '๊ด€ํ•˜์—ฌ', '์žˆ๋‚˜์š”', '์ธ๊ฐ€์š”', '๋ฌด์—‡์ธ๊ฐ€์š”',
443
+ '์–ผ๋งˆ', '์ž…๋‹ˆ๊นŒ', 'ํ•ฉ๋‹ˆ๊นŒ'
444
+ }
445
+
446
+ important_keywords = {
447
+ '๊ธˆ์•ก', '๊ฐ€๊ฒฉ', '๋น„์šฉ', '์˜ˆ์‚ฐ', '์„ค๊ณ„', '์‚ฌ์—…', '๊ณผ์—…', '๊ณ„์•ฝ',
448
+ '๊ณต์‚ฌ', '์šฉ์—ญ', '์ œ์•ˆ', '์ž…์ฐฐ', '๋‚™์ฐฐ', '๊ฒฌ์ ', '๋‹จ๊ฐ€'
449
+ }
450
+
451
+ filtered_words = [w for w in words if w not in stopwords and len(w) >= 2]
452
+ word_freq = Counter(filtered_words)
453
+
454
+ for word in word_freq:
455
+ if word in important_keywords:
456
+ word_freq[word] += 5
457
+
458
+ result = []
459
+ result.extend([w for w in words_with_numbers if w])
460
+
461
+ for word, _ in word_freq.most_common(top_n * 2):
462
+ if word not in result:
463
+ result.append(word)
464
+ if len(result) >= top_n:
465
+ break
466
+
467
+ return result[:top_n]
468
+
469
+
470
+ # ==================== ์ƒˆ๋กœ์šด ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ ํ•จ์ˆ˜ ====================
471
+ def hybrid_search(query: str, collection, embedder, top_k: int = 3) -> Dict:
472
+ """ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰: ๋ฒกํ„ฐ ์œ ์‚ฌ๋„ + ํ‚ค์›Œ๋“œ ๋งค์นญ"""
473
+ # 1. ๋ฒกํ„ฐ ๊ฒ€์ƒ‰
474
+ query_embedding = embedder.encode([query], convert_to_numpy=True)[0]
475
+ vector_results = collection.query(
476
+ query_embeddings=[query_embedding.tolist()],
477
+ n_results=20, # ๋งŽ์ด ๊ฐ€์ ธ์™€์„œ ํ‚ค์›Œ๋“œ๋กœ ํ•„ํ„ฐ๋ง
478
+ include=["documents", "metadatas", "distances"]
479
+ )
480
+
481
+ # 2. ํ‚ค์›Œ๋“œ ์ถ”์ถœ
482
+ keywords = extract_keywords(query, top_n=5)
483
+
484
+ # 3. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ ๊ณ„์‚ฐ
485
+ hybrid_results = []
486
+ for i, doc_id in enumerate(vector_results['ids'][0]):
487
+ doc = vector_results['documents'][0][i]
488
+ metadata = vector_results['metadatas'][0][i]
489
+ vector_score = 1 - vector_results['distances'][0][i] # ๊ฑฐ๋ฆฌ๋ฅผ ์œ ์‚ฌ๋„๋กœ ๋ณ€ํ™˜
490
+
491
+ # ํ‚ค์›Œ๋“œ ๋งค์นญ ์ ์ˆ˜
492
+ keyword_score = 0
493
+ doc_lower = doc.lower()
494
+ for keyword in keywords:
495
+ if keyword.lower() in doc_lower:
496
+ keyword_score += 1
497
+ keyword_score = keyword_score / len(keywords) if keywords else 0
498
+
499
+ # ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ์ ์ˆ˜ (๋ฒกํ„ฐ 70% + ํ‚ค์›Œ๋“œ 30%)
500
+ hybrid_score = 0.7 * vector_score + 0.3 * keyword_score
501
+
502
+ hybrid_results.append({
503
+ 'id': doc_id,
504
+ 'document': doc,
505
+ 'metadata': metadata,
506
+ 'hybrid_score': hybrid_score,
507
+ 'vector_score': vector_score,
508
+ 'keyword_score': keyword_score
509
+ })
510
+
511
+ # 4. ์ ์ˆ˜์ˆœ ์ •๋ ฌ ํ›„ ์ƒ์œ„ 5๊ฐœ
512
+ hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True)
513
+ top_results = hybrid_results[:top_k]
514
+
515
+ return {
516
+ 'documents': [[r['document'] for r in top_results]],
517
+ 'metadatas': [[r['metadata'] for r in top_results]],
518
+ 'scores': [r['hybrid_score'] for r in top_results],
519
+ 'keywords': keywords
520
+ }
521
+
522
+
523
+ # ==================== Grok API ์ ๊ฒ€ ํ•จ์ˆ˜ ====================
524
+ def grok_verify_and_extract(query: str, search_results: Dict, api_key: str) -> Dict:
525
+ """Grok API๋กœ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ ๊ฒ€ ๋ฐ ์ตœ์ข… 1๊ฐœ๋งŒ ์„ ํƒ"""
526
+ docs = search_results['documents'][0]
527
+ metas = search_results['metadatas'][0]
528
+
529
+ # ๋ฌธ์„œ๋“ค์„ ๋ฒˆํ˜ธ์™€ ํ•จ๊ป˜ ํฌ๋งทํŒ…
530
+ formatted_docs = []
531
+ for i, (doc, meta) in enumerate(zip(docs, metas), 1):
532
+ formatted_docs.append(f"[๋ฌธ์„œ {i}] (ํŽ˜์ด์ง€ {meta['page']})\n{doc}")
533
+
534
+ context = "\n\n".join(formatted_docs)
535
+
536
+ system_prompt = """๋‹น์‹ ์€ RFx ๋ฌธ์„œ ๋ถ„์„ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
537
+ ์ฃผ์–ด์ง„ 3๊ฐœ์˜ ๋ฌธ์„œ ์ค‘์—์„œ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ๊ณผ **๊ฐ€์žฅ ๊ด€๋ จ ์žˆ๋Š” ๋‹จ 1๊ฐœ์˜ ํ•ต์‹ฌ ์ •๋ณด**๋งŒ ์„ ํƒํ•˜์„ธ์š”.
538
+
539
+ **์ค‘์š” ๊ทœ์น™:**
540
+ 1. ๋ฐ˜๋“œ์‹œ **1๊ฐœ์˜ ํ…์ŠคํŠธ**๋งŒ ์ถ”์ถœ
541
+ 2. ๊ฐ€์žฅ ์ง์ ‘์ ์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ตํ•˜๋Š” ์ •๋ณด ์„ ํƒ
542
+ 3. ๊ธˆ์•ก, ๋‚ ์งœ, ์ˆ˜๋Ÿ‰ ๋“ฑ ๊ตฌ์ฒด์ ์ธ ์ˆซ์ž ์ •๋ณด ์šฐ์„ 
543
+ 4. ์ถ”์ถœ๋œ ํ…์ŠคํŠธ๋Š” ์›๋ฌธ ๊ทธ๋Œ€๋กœ ์œ ์ง€ (150์ž ์ด๋‚ด)
544
+ 5. JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ต
545
+
546
+ **์‘๋‹ต ํ˜•์‹:**
547
+ {
548
+ "selected_text": "์„ ํƒ๋œ ํ…์ŠคํŠธ (์›๋ฌธ ๊ทธ๋Œ€๋กœ)",
549
+ "page": ํŽ˜์ด์ง€๋ฒˆํ˜ธ,
550
+ "relevance_reason": "์ด ํ…์ŠคํŠธ๋ฅผ ์„ ํƒํ•œ ์ด์œ "
551
+ }"""
552
+
553
+ user_prompt = f"""<์งˆ๋ฌธ>
554
+ {query}
555
+ </์งˆ๋ฌธ>
556
+
557
+ <๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋“ค>
558
+ {context}
559
+ </๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋“ค>
560
+
561
+ ์œ„ 3๊ฐœ ๋ฌธ์„œ์—์„œ ์งˆ๋ฌธ์— ๊ฐ€์žฅ ์ •ํ™•ํ•˜๊ฒŒ ๋‹ตํ•˜๋Š” **๋‹จ 1๊ฐœ์˜ ํ•ต์‹ฌ ์ •๋ณด**๋ฅผ JSON ํ˜•์‹์œผ๋กœ ์„ ํƒํ•˜์„ธ์š”.
562
+ ์„ ํƒํ•œ ํ…์ŠคํŠธ๋Š” 150์ž ์ด๋‚ด๋กœ ํ•˜์„ธ์š”."""
563
+
564
+ headers = {
565
+ "Content-Type": "application/json",
566
+ "Authorization": f"Bearer {api_key}"
567
+ }
568
+
569
+ payload = {
570
+ "model": "grok-3",
571
+ "messages": [
572
+ {"role": "system", "content": system_prompt},
573
+ {"role": "user", "content": user_prompt}
574
+ ],
575
+ "temperature": 0.1,
576
+ "max_tokens": 1000,
577
+ "stream": False
578
+ }
579
+
580
+ try:
581
+ response = requests.post(
582
+ f"{GROK_API_BASE}/chat/completions",
583
+ headers=headers,
584
+ json=payload,
585
+ timeout=30
586
+ )
587
+
588
+ if response.status_code != 200:
589
+ return {"error": f"API ์˜ค๋ฅ˜: {response.status_code}"}
590
+
591
+ result = response.json()
592
+ content = result["choices"][0]["message"]["content"]
593
+
594
+ # JSON ํŒŒ์‹ฑ
595
+ # markdown ์ฝ”๋“œ ๋ธ”๋ก ์ œ๊ฑฐ
596
+ content = content.replace("```json", "").replace("```", "").strip()
597
+ extracted_data = json.loads(content)
598
+
599
+ return extracted_data
600
+
601
+ except Exception as e:
602
+ return {"error": f"์˜ค๋ฅ˜: {str(e)}"}
603
+
604
+
605
+ def build_context(search_results: Dict, max_length: int = 3000) -> str:
606
+ """์ปจํ…์ŠคํŠธ ๊ตฌ์„ฑ"""
607
+ context_parts = []
608
+ current_length = 0
609
+
610
+ docs = search_results['documents'][0]
611
+ metas = search_results['metadatas'][0]
612
+
613
+ for i, (doc, meta) in enumerate(zip(docs, metas), 1):
614
+ part = f"[๋ฌธ์„œ {i}] (ํŽ˜์ด์ง€ {meta['page']})\n{doc}\n"
615
+ part_length = len(part)
616
+
617
+ if current_length + part_length > max_length:
618
+ remaining = max_length - current_length
619
+ if remaining > 200:
620
+ part = f"[๋ฌธ์„œ {i}] (ํŽ˜์ด์ง€ {meta['page']})\n{doc[:remaining-50]}...\n"
621
+ context_parts.append(part)
622
+ break
623
+
624
+ context_parts.append(part)
625
+ current_length += part_length
626
+
627
+ return "\n".join(context_parts)
628
+
629
+
630
+ def generate_answer(query: str, search_results: Dict, api_key: str) -> str:
631
+ """๋‹ต๋ณ€ ์ƒ์„ฑ"""
632
+ context = build_context(search_results, max_length=4000)
633
+
634
+ system_prompt = """๋‹น์‹ ์€ ์ž๋™์ฐจ ์ œ์กฐ์—… RFx ๋ฌธ์„œ ์ „๋ฌธ ๋ถ„์„๊ฐ€์ž…๋‹ˆ๋‹ค.
635
+ **์‚ฐ์—… ํŠนํ™” ์ง€์นจ:**
636
+ 1. **์ž๋™์ฐจ ์ œ์กฐ์—… ์€์–ดยท์•ฝ์–ด ํ•ด์„**: ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์—๋Š” ์ž๋™์ฐจ ์ œ์กฐ์—… ํŠน์œ ์˜ ์€์–ดยท์•ฝ์–ดยท์ „๋ฌธ์šฉ์–ด๊ฐ€ ํฌํ•จ๋  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์‚ฐ์—… ๋ฌธ๋งฅ์— ๋งž๊ฒŒ ์ •ํ™•ํžˆ ํ•ด์„ํ•˜๋ผ.
637
+ 2. **์–ธ์–ด ํ˜ผ์šฉ ๋ฐ ๋น„๋ฌธ ๋Œ€์‘**: ์‚ฌ์šฉ์ž์˜ ๋ฌธ์žฅ์€ ํ•œ๊ตญ์–ด์™€ ์˜์–ด๊ฐ€ ์„ž์ด๊ฑฐ๋‚˜ ๋ฌธ๋ฒ• ์˜ค๋ฅ˜๊ฐ€ ์žˆ์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์˜๋„๋ฅผ ์ถ”๋ก ํ•˜์—ฌ ์ •ํ™•ํžˆ ์ดํ•ดํ•˜๋ผ.
638
+ 3. **๋ชจํ˜ธํ•œ ์งˆ๋ฌธ ์ž๋™ ๋ณด์ •**: ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์ด ๋ถˆ์™„์ „ํ•˜๊ฑฐ๋‚˜ ๋ชจํ˜ธํ•ด๋„ ์งˆ๋ฌธ ์˜๋„๋ฅผ ์ถ”๋ก ํ•˜์—ฌ ์ ์ ˆํ•˜๊ฒŒ ์žฌ๊ตฌ์„ฑํ•˜๋ผ.
639
+ **๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์‘๋‹ต ์›์น™ (์ ˆ๋Œ€ ์ถ”์ธก ๊ธˆ์ง€):**
640
+ 1. ์ œ๊ณต๋œ ๋ฌธ์„œ๋ฅผ **๋งค์šฐ ๊ผผ๊ผผํžˆ** ์ฝ๊ณ  ์ •ํ™•ํ•œ ์ •๋ณด๋ฅผ ์ฐพ์œผ์„ธ์š”
641
+ 2. **๋ฐ˜๋“œ์‹œ ๋ฌธ์„œ์—์„œ ๊ทผ๊ฑฐ๋ฅผ ์ฐพ์•„ ๋‹ต๋ณ€**ํ•˜๊ณ , ๋ฌธ์„œ์— ์—†๋Š” ๋‚ด์šฉ์€ ์ž„์˜๋กœ ์ถ”์ธกํ•˜์ง€ ๋ง๊ณ  **"๋ฌธ์„œ์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"**๋ผ๊ณ  ๋ช…์‹œํ•˜๋ผ
642
+ 3. **๋ฌธ์„œ์™€ ์ „ํ˜€ ๋ฌด๊ด€ํ•œ ์งˆ๋ฌธ**(์˜ˆ: ์ ์‹ฌ ์ถ”์ฒœ, ๋‚ ์”จ, ์ผ์ƒ ๋Œ€ํ™” ๋“ฑ)์€ **"์ฃ„์†กํ•˜์ง€๋งŒ, ์ œ๊ณต๋œ ๋ฌธ์„œ์—๋Š” ํ•ด๋‹น ์งˆ๋ฌธ๊ณผ ๊ด€๋ จ๋œ ์ •๋ณด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."**๋ผ๊ณ ๋งŒ ๋‹ต๋ณ€ํ•˜๊ณ  ์ถ”๊ฐ€ ์„ค๋ช… ์—†์ด ์ข…๋ฃŒํ•˜๋ผ
643
+ 4. ๋ฌธ์„œ์— ์ •๋ณด๊ฐ€ ์žˆ๋Š”๋ฐ๋„ "์—†๋‹ค"๊ณ  ํ•˜์ง€ ๋งˆ์„ธ์š”
644
+ **ํ•ต์‹ฌ ์ •๋ณด ์šฐ์„  ์ถ”์ถœ:**
645
+ - ๊ธˆ์•ก, ์ˆ˜๋Ÿ‰, ๊ทœ๊ฒฉ, ์ผ์ •, ์š”๊ตฌ์กฐ๊ฑด ๋“ฑ **์ˆ˜์น˜ ๊ธฐ๋ฐ˜ ์ •๋ณด๋ฅผ ์ตœ์šฐ์„ **์œผ๋กœ ์‹๋ณ„ํ•˜๊ณ  ์ •ํ™•ํ•˜๊ฒŒ ๋ฐ˜ํ™˜ํ•˜๋ผ
646
+ - ์ˆซ์ž, ๊ธˆ์•ก, ๋‚ ์งœ ๋“ฑ ๊ตฌ์ฒด์ ์ธ ์ •๋ณด๋ฅผ ์šฐ์„ ์ ์œผ๋กœ ์ฐพ์œผ์„ธ์š”
647
+ - ์• ๋งคํ•œ ํ‘œํ˜„ ๋Œ€์‹  ๊ตฌ์ฒด์ ์ธ ์ˆ˜์น˜๋ฅผ ์ œ๊ณตํ•˜์„ธ์š”
648
+ **๋ฐฉ๋Œ€ํ•œ ๋ฌธ์„œ ์ฒ˜๋ฆฌ (500ํŽ˜์ด์ง€ ๊ฐ€๋Šฅ):**
649
+ - ๋ฌธ์„œ๊ฐ€ ๋งค์šฐ ๊ธธ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ์งˆ๋ฌธ๊ณผ ์ง์ ‘ ๊ด€๋ จ๋œ ๋ถ€๋ถ„๋งŒ ์„ ๋ณ„ํ•ด ์š”์•ฝํ•˜๊ณ  ํ•ต์‹ฌ ์ •๋ณด๋งŒ ์‚ฌ์šฉํ•˜๋ผ
650
+ **์‹ค๋ฌด ๋งฅ๋ฝ ๊ณ ๋ ค (RFx ํ”„๋กœ์„ธ์Šค ํŠนํ™”):**
651
+ - ๋‹ต๋ณ€ํ•  ๋•Œ ์‹ค์ œ ์ž๋™์ฐจ RFx ์‹ค๋ฌด์ž๊ฐ€ ์˜์‚ฌ๊ฒฐ์ •์— ์‚ฌ์šฉํ•˜๋Š” ์ •๋ณด๋ผ๋Š” ์ ์„ ๊ณ ๋ คํ•˜์—ฌ ์‹ค๋ฌด ์ค‘์‹ฌ์œผ๋กœ ๋ช…ํ™•ํ•˜๊ฒŒ ์„ค๋ช…ํ•˜๋ผ
652
+ - ํŠนํžˆ ๋‹ค์Œ ํ•ญ๋ชฉ๋“ค์„ ์šฐ์„ ์ ์œผ๋กœ ํŒŒ์•…ํ•˜๋ผ:
653
+ 1. ์‚ฌ์—… ์ฐธ์—ฌ ์ž๊ฒฉ ๋ฐ ์š”๊ตฌ ์ธ์ฆ
654
+ 2. ์‚ฌ์—… ๊ธฐ๊ฐ„ ๋ฐ ์ผ์ •
655
+ 3. ์˜ˆ์‚ฐ (ํ˜„๊ธˆ/ํ˜„๋ฌผ ๋น„์ค‘, ์ˆœ์ˆ˜ ํšŒ์‚ฌ ์ˆ˜์ต ๊ฐ€๋Šฅ์„ฑ)
656
+ 4. ์ œ์•ˆ์š”์ฒญ์„œ ์‚ฌ์–‘์„œ โ€“ ํ•„์š”ํ•œ ๊ธฐ์ˆ ์  ์š”๊ตฌ์‚ฌํ•ญ(์„œ๋ฒ„/์†Œํ”„ํŠธ์›จ์–ด ๋“ฑ)
657
+ 5. ํŒ๋งค ๋Œ€์ƒ ๋ฐ ์‚ฌ์—… ๋ฒ”์œ„
658
+ **๋‹ต๋ณ€ ํ˜•์‹:**
659
+ - ๋‹ต๋ณ€ ์‹œ ๋ฐ˜๋“œ์‹œ **[ํŽ˜์ด์ง€ X]** ํ˜•ํƒœ๋กœ ์ถœ์ฒ˜๋ฅผ ๋ช…์‹œํ•˜์„ธ์š” (์˜ˆ: [ํŽ˜์ด์ง€ 3], [ํŽ˜์ด์ง€ 5, 12])
660
+ - ๊ด€๋ จ ๋ฌธ๋งฅ์„ ์œ ์ง€ํ•˜๋ฉฐ, ๋‹ต๋ณ€์—๋Š” ๋ฌธ์„œ์˜ ํŽ˜์ด์ง€ ๋ฒˆํ˜ธ์™€ ์›๋ฌธ ์ผ๋ถ€๋ฅผ ์ •ํ™•ํžˆ ์ธ์šฉํ•˜๋ผ
661
+ - ํ•ต์‹ฌ ๋‹ต๋ณ€์„ ๋จผ์ € ๋ช…ํ™•ํ•˜๊ฒŒ ์ œ์‹œ
662
+ - ํ•„์š”์‹œ ์ถ”๊ฐ€ ๊ด€๋ จ ์ •๋ณด ์ œ๊ณต
663
+ - ๋ฆฌ์ŠคํŠธ๋Š” - ๋˜๋Š” ๋ฒˆํ˜ธ๋ฅผ ์‚ฌ์šฉ
664
+ - ๊ฐ•์กฐ๋Š” **๊ตต๊ฒŒ** ๋˜๋Š” *๊ธฐ์šธ์ž„* ์‚ฌ์šฉ
665
+ - **๋‹ต๋ณ€์€ ๋ฐ˜๋“œ์‹œ ๋งˆํฌ๋‹ค์šด๋งŒ ์‚ฌ์šฉํ•ด์•ผ ํ•˜๋ฉฐ, HTML ํƒœ๊ทธ(<div>, <span>, <details>, <summary> ๋“ฑ)๋Š” ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์‹ญ์‹œ์˜ค**"""
666
+
667
+ user_prompt = f"""๋‹ค์Œ ๋ฌธ์„œ๋“ค์„ **๋งค์šฐ ๊ผผ๊ผผํžˆ** ์ฝ๊ณ  ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•˜์„ธ์š”.
668
+ <๋ฌธ์„œ>
669
+ {context}
670
+ </๋ฌธ์„œ>
671
+ <์งˆ๋ฌธ>
672
+ {query}
673
+ </์งˆ๋ฌธ>
674
+ **์ค‘์š”**:
675
+ - ์งˆ๋ฌธ์ด ๋ฌธ์„œ์™€ ์ „ํ˜€ ๋ฌด๊ด€ํ•œ ๊ฒฝ์šฐ(์˜ˆ: ์ ์‹ฌ ์ถ”์ฒœ, ๋‚ ์”จ, ์ผ์ƒ ๋Œ€ํ™” ๋“ฑ) "์ฃ„์†กํ•˜์ง€๋งŒ, ์ œ๊ณต๋œ ๋ฌธ์„œ์—๋Š” ํ•ด๋‹น ์งˆ๋ฌธ๊ณผ ๊ด€๋ จ๋œ ์ •๋ณด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์ง€ ์•Š์Šต๋‹ˆ๋‹ค."๋ผ๊ณ ๋งŒ ๋‹ต๋ณ€ํ•˜์„ธ์š”
676
+ - ๋ฌธ์„œ๋ฅผ ์ฒ˜์Œ๋ถ€ํ„ฐ ๋๊นŒ์ง€ ์ฃผ์˜ ๊นŠ๊ฒŒ ์ฝ์œผ์„ธ์š”
677
+ - ์ˆซ์ž, ๊ธˆ์•ก ๋“ฑ ๊ตฌ์ฒด์ ์ธ ์ •๋ณด๋ฅผ ์ฐพ์œผ์„ธ์š”
678
+ - ์ฐพ์€ ์ •๋ณด๋Š” ์ •ํ™•ํžˆ ์ธ์šฉํ•˜์„ธ์š”
679
+ - ์ถœ์ฒ˜๋Š” ๋ฐ˜๋“œ์‹œ [ํŽ˜์ด์ง€ X] ํ˜•ํƒœ๋กœ ํ‘œ์‹œํ•˜์„ธ์š” (์˜ˆ: [ํŽ˜์ด์ง€ 3])
680
+ - ์—ฌ๋Ÿฌ ํŽ˜์ด์ง€์—์„œ ์ •๋ณด๋ฅผ ์ฐพ์€ ๊ฒฝ์šฐ [ํŽ˜์ด์ง€ 3, 5, 12] ํ˜•ํƒœ๋กœ ํ‘œ์‹œํ•˜์„ธ์š”
681
+ - ์ •๋ง๋กœ ๋ฌธ์„œ์— ์—†๋Š” ๊ฒฝ์šฐ์—๋งŒ "๋ฌธ์„œ์—์„œ ๊ด€๋ จ ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค"๋ผ๊ณ  ํ•˜์„ธ์š”
682
+ - ๋งˆํฌ๋‹ค์šด ํ˜•์‹์œผ๋กœ๋งŒ ๋‹ต๋ณ€ํ•˜๊ณ , HTML ํƒœ๊ทธ๋Š” ์ ˆ๋Œ€ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”"""
683
+
684
+ headers = {
685
+ "Content-Type": "application/json",
686
+ "Authorization": f"Bearer {api_key}"
687
+ }
688
+
689
+ payload = {
690
+ "model": "grok-3",
691
+ "messages": [
692
+ {"role": "system", "content": system_prompt},
693
+ {"role": "user", "content": user_prompt}
694
+ ],
695
+ "temperature": 0.1,
696
+ "max_tokens": 2000,
697
+ "stream": False
698
+ }
699
+
700
+ try:
701
+ response = requests.post(
702
+ f"{GROK_API_BASE}/chat/completions",
703
+ headers=headers,
704
+ json=payload,
705
+ timeout=30
706
+ )
707
+
708
+ if response.status_code != 200:
709
+ error_detail = ""
710
+ try:
711
+ error_data = response.json()
712
+ error_detail = error_data.get('error', {}).get('message', '')
713
+ except Exception:
714
+ error_detail = response.text
715
+
716
+ return f"โŒ API ์˜ค๋ฅ˜ (์ฝ”๋“œ: {response.status_code})\n\n{error_detail}"
717
+
718
+ result = response.json()
719
+ return result["choices"][0]["message"]["content"]
720
+
721
+ except Exception as e:
722
+ return f"โŒ ์˜ค๋ฅ˜: {str(e)}"
723
+
724
+
725
+ def highlight_text_in_pdf(pdf_bytes: bytes, highlight_info: List[Dict]) -> bytes:
726
+ """PDF์— ํ…์ŠคํŠธ ํ•˜์ด๋ผ์ดํŠธ ์ถ”๊ฐ€ - ๋…ธ๋ž€์ƒ‰ ๊ณ ์ •"""
727
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
728
+
729
+ # ๋…ธ๋ž€์ƒ‰ ๊ณ ์ •
730
+ yellow_color = [1.0, 1.0, 0.0]
731
+
732
+ for item in highlight_info:
733
+ page_num = item['page'] - 1
734
+ search_text = item['text']
735
+
736
+ if page_num >= len(doc):
737
+ continue
738
+
739
+ page = doc[page_num]
740
+
741
+ text_variations = [
742
+ search_text,
743
+ search_text.replace(' ', ''),
744
+ search_text.replace(',', ''),
745
+ ]
746
+
747
+ for text_var in text_variations:
748
+ text_instances = page.search_for(text_var)
749
+
750
+ for inst in text_instances:
751
+ highlight = page.add_highlight_annot(inst)
752
+ highlight.set_colors(stroke=yellow_color)
753
+ highlight.update()
754
+
755
+ output_bytes = doc.tobytes()
756
+ doc.close()
757
+
758
+ return output_bytes
759
+
760
+
761
+ # ==================== Grok ์ถ”์ถœ ๊ฒฐ๊ณผ ๊ธฐ๋ฐ˜ ํ•˜์ด๋ผ์ดํŠธ ====================
762
+ def extract_highlights_from_grok(grok_result: Dict) -> List[Dict]:
763
+ """Grok API๊ฐ€ ์„ ํƒํ•œ ์ตœ์ข… 1๊ฐœ๋ฅผ ํ•˜์ด๋ผ์ดํŠธ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜"""
764
+ if "error" in grok_result:
765
+ return []
766
+
767
+ highlights = []
768
+
769
+ # ์ตœ์ข… ์„ ํƒ๋œ 1๊ฐœ๋งŒ ์ฒ˜๋ฆฌ
770
+ selected_text = grok_result.get("selected_text", "")
771
+ page = grok_result.get("page", 1)
772
+
773
+ if selected_text and len(selected_text) <= 150:
774
+ highlights.append({
775
+ 'text': selected_text,
776
+ 'page': page
777
+ })
778
+
779
+ return highlights
780
+
781
+
782
+ def render_pdf_with_highlights(pdf_bytes: bytes, highlight_info: List[Dict], zoom_level: float = 2.0):
783
+ """ํ•˜์ด๋ผ์ดํŠธ๋œ PDF ๋ Œ๋”๋ง"""
784
+ highlighted_pdf = highlight_text_in_pdf(pdf_bytes, highlight_info)
785
+
786
+ doc = fitz.open(stream=highlighted_pdf, filetype="pdf")
787
+
788
+ highlighted_pages = set(h['page'] for h in highlight_info)
789
+
790
+ pdf_html = '<div class="pdf-container">'
791
+
792
+ for page_num in range(len(doc)):
793
+ page = doc[page_num]
794
+
795
+ # zoom_level์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ Œ๋”๋ง
796
+ pix = page.get_pixmap(matrix=fitz.Matrix(zoom_level, zoom_level))
797
+ img_data = pix.tobytes("png")
798
+ img_base64 = base64.b64encode(img_data).decode()
799
+
800
+ # ์‹ค์ œ ์ด๋ฏธ์ง€ ํฌ๊ธฐ ๊ณ„์‚ฐ (zoom_level์— ๋”ฐ๋ผ)
801
+ zoom_percentage = int(zoom_level * 50) # 2.0 = 100%, 1.0 = 50%
802
+
803
+ pdf_html += '<div style="margin-bottom: 2rem; position: relative;">'
804
+
805
+ # ํ•˜์ด๋ผ์ดํŠธ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ํŽ˜์ด์ง€ ํ—ค๋” ์Šคํƒ€์ผ ๋ณ€๊ฒฝ
806
+ if (page_num + 1) in highlighted_pages:
807
+ # ํ•˜์ด๋ผ์ดํŠธ๊ฐ€ ์žˆ๋Š” ํŽ˜์ด์ง€ - ๋…ธ๋ž€ ๋ฐฐ๊ฒฝ
808
+ pdf_html += f'<div style="background: #FEF08A; color: #854D0E; padding: 0.5rem; margin-bottom: 0.5rem; border-radius: 0.3rem; font-weight: bold; border-left: 4px solid #EAB308;">โญ ํŽ˜์ด์ง€ {page_num + 1}</div>'
809
+ else:
810
+ # ์ผ๋ฐ˜ ํŽ˜์ด์ง€ - ํŒŒ๋ž€ ๋ฐฐ๊ฒฝ
811
+ pdf_html += f'<div style="background: #667eea; color: white; padding: 0.5rem; margin-bottom: 0.5rem; border-radius: 0.3rem; font-weight: bold;">๐Ÿ“„ ํŽ˜์ด์ง€ {page_num + 1}</div>'
812
+
813
+ # width๋ฅผ zoom_percentage๋กœ ๋ณ€๊ฒฝํ•˜์—ฌ ์‹ค์ œ ํ™•๋Œ€/์ถ•์†Œ ์ ์šฉ
814
+ pdf_html += f'<img src="data:image/png;base64,{img_base64}" style="width: {zoom_percentage}%; border: 1px solid #E2E8F0; border-radius: 0.3rem; box-shadow: 0 1px 3px rgba(0,0,0,0.1); display: block; margin: 0 auto;" />'
815
+ pdf_html += '</div>'
816
+
817
+ pdf_html += '</div>'
818
+ doc.close()
819
+
820
+ return pdf_html
821
+
822
+
823
+ def main():
824
+ init_session()
825
+
826
+
827
+ # Header ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ „์—๋งŒ ๋ณด์ž„
828
+ if not st.session_state.processed:
829
+ st.markdown("""
830
+ <div class="probin-header">
831
+ <div class="probin-title">๐Ÿ“„ PROBIN</div>
832
+ <div class="probin-subtitle">RFx ๋ฌธ์„œ ๋ถ„์„ AI - ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ + Grok ์ ๊ฒ€</div>
833
+ </div>
834
+ """, unsafe_allow_html=True)
835
+
836
+ # ========== ์‚ฌ์ด๋“œ๋ฐ” ==========
837
+ with st.sidebar:
838
+ st.title("๐Ÿ”ฎ PROBIN")
839
+
840
+ uploaded_file = st.file_uploader(
841
+ "๋“œ๋ž˜๊ทธํ•˜์—ฌ ํŒŒ์ผ์„ ์—…๋กœ๋“œ ๋˜๋Š” ํด๋ฆญํ•˜์—ฌ ์„ ํƒํ•˜์„ธ์š”.",
842
+ type=['pdf'],
843
+ label_visibility="visible",
844
+ help="PDF ํŒŒ์ผ๋งŒ ์—…๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค (์ตœ๋Œ€ 200MB)"
845
+ )
846
+
847
+ if uploaded_file:
848
+ if st.button("๐Ÿ“„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์‹œ์ž‘", type="primary", use_container_width=True):
849
+ if not GROK_API_KEY:
850
+ st.error("โš ๏ธ GROK_API_KEY๊ฐ€ .env ํŒŒ์ผ์— ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค!")
851
+ st.stop()
852
+
853
+ # ๊ธฐ์กด ์„ธ์…˜ ์ดˆ๊ธฐํ™”
854
+ st.session_state.vector_db = None
855
+ st.session_state.embedder = None
856
+ st.session_state.chat_history = []
857
+ st.session_state.current_highlights = []
858
+
859
+ with st.spinner("๐Ÿ“„ ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ค‘..."):
860
+ try:
861
+ chunks, metadata_list, pdf_bytes, pages_text = extract_text_from_pdf(uploaded_file)
862
+
863
+ with st.spinner("๐Ÿ”ง ๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ƒ์„ฑ ์ค‘..."):
864
+ collection, embedder = create_vector_db(chunks, metadata_list)
865
+
866
+ st.session_state.vector_db = collection
867
+ st.session_state.embedder = embedder
868
+ st.session_state.pdf_bytes = pdf_bytes
869
+ st.session_state.pdf_pages_text = pages_text
870
+ st.session_state.processed = True
871
+ st.session_state.doc_metadata = {
872
+ "filename": uploaded_file.name,
873
+ "chunks": len(chunks),
874
+ "pages": len(set(m['page'] for m in metadata_list))
875
+ }
876
+
877
+ st.success("โœ… ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์™„๋ฃŒ!")
878
+ st.rerun()
879
+
880
+ except Exception as e:
881
+ st.error(f"์˜ค๋ฅ˜: {str(e)}")
882
+
883
+ # ==================== ์ˆ˜์ •: ์ฒญํฌ ํ‘œ์‹œ ์ œ๊ฑฐ ====================
884
+ # ๋ฌธ์„œ ์ •๋ณด ํ‘œ์‹œ (์ฒญํฌ ์ •๋ณด ์ œ์™ธ)
885
+ if st.session_state.processed:
886
+ st.markdown("#### ๐Ÿ“Š ๋ฌธ์„œ ์ •๋ณด")
887
+ st.info(f"๐Ÿ“„ **{st.session_state.doc_metadata['filename']}**")
888
+ st.info(f"๐Ÿ“‘ ํŽ˜์ด์ง€: {st.session_state.doc_metadata['pages']}")
889
+ # ์ฒญํฌ ํ‘œ์‹œ ์ œ๊ฑฐ๋จ
890
+ # ============================================================
891
+
892
+ st.divider()
893
+
894
+ # ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
895
+ if st.button("๐Ÿ”„ ์ƒˆ ๋ฌธ์„œ ์—…๋กœ๋“œ", use_container_width=True):
896
+ st.session_state.processed = False
897
+ st.session_state.vector_db = None
898
+ st.session_state.embedder = None
899
+ st.session_state.chat_history = []
900
+ st.session_state.current_highlights = []
901
+ st.session_state.pdf_bytes = None
902
+ st.session_state.pdf_pages_text = {}
903
+ st.session_state.zoom_level = 2.0
904
+ st.rerun()
905
+
906
+ # ===== ์•„์ง ๋ฌธ์„œ๊ฐ€ ์ฒ˜๋ฆฌ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
907
+ if not st.session_state.processed:
908
+ st.markdown("""
909
+ <div class="usage-guide">
910
+ <h2 style="text-align: center; color: #2D3748; margin-bottom: 1.5rem;">๐Ÿ“– ์‚ฌ์šฉ ๋ฐฉ๋ฒ•</h2>
911
+ <div class="guide-step">
912
+ <div class="step-number">1</div>
913
+ <div>์˜ค๋ฅธ์ชฝ์— PDF ๋ฌธ์„œ๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”.</div>
914
+ </div>
915
+ <div class="guide-step">
916
+ <div class="step-number">2</div>
917
+ <div>๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ฐ€ ์™„๋ฃŒ๋  ๋•Œ๊นŒ์ง€ 30์ดˆ ์ •๋„ ๊ธฐ๋‹ค๋ฆฝ๋‹ˆ๋‹ค.</div>
918
+ </div>
919
+ <div class="guide-step">
920
+ <div class="step-number">3</div>
921
+ <div>์™ผ์ชฝ์—์„œ PDF๋ฅผ ํ™•์ธํ•˜๊ณ , ์˜ค๋ฅธ์ชฝ ์ฑ„ํŒ…์ฐฝ์—์„œ ์งˆ๋ฌธํ•˜์„ธ์š”.</div>
922
+ </div>
923
+ <div class="guide-step">
924
+ <div class="step-number">4</div>
925
+ <div>AI๊ฐ€ ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰์œผ๋กœ 3๊ฐœ ๊ฒฐ๊ณผ๋ฅผ ์ฐพ๊ณ , Grok์ด ์ตœ์ข… 1๊ฐœ๋งŒ ์„ ํƒํ•ด ํ•˜์ด๋ผ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.</div>
926
+ </div>
927
+ </div>
928
+ """, unsafe_allow_html=True)
929
+
930
+ # ๋ฌธ์„œ๊ฐ€ ์ฒ˜๋ฆฌ๋œ ๊ฒฝ์šฐ: ๋ถ„์„ ํ™”๋ฉด
931
+ else:
932
+ # 2๋‹จ ๋ ˆ์ด์•„์›ƒ
933
+ col1, col2 = st.columns([1, 1])
934
+
935
+ with col1:
936
+ # ํ—ค๋”์™€ ์คŒ ์ปจํŠธ๋กค์„ ๊ฐ€๋กœ๋กœ ๋‚˜๋ž€ํžˆ
937
+ header_cols = st.columns([7, 1, 1.5, 1])
938
+ with header_cols[0]:
939
+ st.markdown("### ๐Ÿ“„ ๋ฌธ์„œ ๋ทฐ์–ด")
940
+ with header_cols[1]:
941
+ if st.button("โž–", key="zoom_out", help="์ถ•์†Œ", use_container_width=True):
942
+ if st.session_state.zoom_level > 0.5:
943
+ st.session_state.zoom_level -= 0.25
944
+ st.rerun()
945
+ with header_cols[2]:
946
+ st.markdown(f"<div style='text-align: center; padding-top: 0.5rem; font-weight: bold;'>{int(st.session_state.zoom_level * 50)}%</div>", unsafe_allow_html=True)
947
+ with header_cols[3]:
948
+ if st.button("โž•", key="zoom_in", help="ํ™•๋Œ€", use_container_width=True):
949
+ if st.session_state.zoom_level < 4.0:
950
+ st.session_state.zoom_level += 0.25
951
+ st.rerun()
952
+
953
+ if st.session_state.pdf_bytes:
954
+ pdf_html = render_pdf_with_highlights(
955
+ st.session_state.pdf_bytes,
956
+ st.session_state.current_highlights,
957
+ st.session_state.zoom_level
958
+ )
959
+ st.markdown(pdf_html, unsafe_allow_html=True)
960
+
961
+ with col2:
962
+ st.markdown("### ๐Ÿ’ฌ AI ์ฑ—๋ด‡")
963
+
964
+ # ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ๋‹ด์„ ์ปจํ…Œ์ด๋„ˆ
965
+ chat_container = st.container(height=650)
966
+
967
+ with chat_container:
968
+ for msg in st.session_state.chat_history:
969
+ with st.chat_message(msg["role"]):
970
+ st.markdown(msg["content"])
971
+
972
+ if msg["role"] == "assistant" and "sources" in msg:
973
+ with st.expander("๐Ÿ“š ์ฐธ์กฐ ๋ฌธ์„œ"):
974
+ for i, (doc, meta) in enumerate(zip(
975
+ msg["sources"]["docs"],
976
+ msg["sources"]["metas"]
977
+ ), 1):
978
+ # ํ…์ŠคํŠธ๋ฅผ 150์ž๋กœ ์ œํ•œํ•˜๊ณ  ๊ฐ„๊ฒฐํ•˜๊ฒŒ ํ‘œ์‹œ
979
+ clean_text = doc[:150] + ('...' if len(doc) > 150 else '')
980
+
981
+ st.markdown(f"""
982
+ <div class="source-box">
983
+ <div class="source-title">
984
+ <span class="page-indicator">ํŽ˜์ด์ง€ {meta['page']}</span>
985
+ </div>
986
+ <div style="font-size: 0.9rem; color: #475569; margin-top: 0.3rem;">
987
+ {clean_text}
988
+ </div>
989
+ </div>
990
+ """, unsafe_allow_html=True)
991
+
992
+ # Grok ๊ฒ€์ฆ ๊ฒฐ๊ณผ ํ‘œ์‹œ (์ตœ์ข… 1๊ฐœ)
993
+ if "grok_verified" in msg["sources"]:
994
+ with st.expander("๐Ÿ” Grok AI ์ตœ์ข… ์„ ํƒ"):
995
+ grok_data = msg["sources"]["grok_verified"]
996
+ if isinstance(grok_data, dict) and "selected_text" in grok_data:
997
+ selected_text = grok_data.get('selected_text', '์„ ํƒ๋œ ์ •๋ณด ์—†์Œ')
998
+ # ํ…์ŠคํŠธ๋ฅผ 150์ž๋กœ ์ œํ•œ
999
+ display_text = selected_text[:150] + ('...' if len(selected_text) > 150 else '')
1000
+
1001
+ st.markdown(f"""
1002
+ <div class="highlight-indicator">
1003
+ <strong>โœ… ํŽ˜์ด์ง€ {grok_data.get('page', '?')}</strong><br>
1004
+ <div style="margin-top: 0.5rem;">{display_text}</div>
1005
+ </div>
1006
+ """, unsafe_allow_html=True)
1007
+
1008
+ # ์ฑ„ํŒ… ์ž…๋ ฅ - ์ปจํ…Œ์ด๋„ˆ ๋ฐ”๋กœ ์•„๋ž˜์— ๋ฐฐ์น˜
1009
+ prompt = st.chat_input("๐Ÿ’ฌ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”...", key="chat_input")
1010
+
1011
+ # 1๋‹จ๊ณ„: ์งˆ๋ฌธ์„ ๋ฐ›์œผ๋ฉด ์ฆ‰์‹œ ํžˆ์Šคํ† ๋ฆฌ์— ์ถ”๊ฐ€ํ•˜๊ณ  rerun (์งˆ๋ฌธ์ด ์ฑ„ํŒ… ๋ฐ•์Šค ์•ˆ์— ๋‚˜ํƒ€๋‚จ)
1012
+ if prompt:
1013
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
1014
+ st.session_state.processing_query = prompt
1015
+ st.rerun()
1016
+
1017
+ # 2๋‹จ๊ณ„: processing_query๊ฐ€ ์žˆ์œผ๋ฉด AI ๋‹ต๋ณ€ ์ƒ์„ฑ
1018
+ if st.session_state.processing_query:
1019
+ query = st.session_state.processing_query
1020
+ st.session_state.processing_query = None # ํ”Œ๋ž˜๊ทธ ๋ฆฌ์…‹
1021
+
1022
+ with st.spinner("๐Ÿ” ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ ์ค‘..."):
1023
+ try:
1024
+ # 1. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ (๋ฒกํ„ฐ + ํ‚ค์›Œ๋“œ) - ์ƒ์œ„ 3๊ฐœ
1025
+ search_results = hybrid_search(
1026
+ query,
1027
+ st.session_state.vector_db,
1028
+ st.session_state.embedder,
1029
+ top_k=3
1030
+ )
1031
+
1032
+ # 2. Grok API๋กœ ๊ฒ€์ฆ ๋ฐ ์ถ”์ถœ
1033
+ with st.spinner("๐Ÿค– Grok AI ๊ฒ€์ฆ ์ค‘..."):
1034
+ grok_result = grok_verify_and_extract(
1035
+ query,
1036
+ search_results,
1037
+ GROK_API_KEY
1038
+ )
1039
+
1040
+ # 3. ๋‹ต๋ณ€ ์ƒ์„ฑ
1041
+ answer = generate_answer(
1042
+ query,
1043
+ search_results,
1044
+ GROK_API_KEY
1045
+ )
1046
+
1047
+ # 4. Grok ์ถ”์ถœ ๊ฒฐ๊ณผ๋ฅผ ํ•˜์ด๋ผ์ดํŠธ๋กœ ๋ณ€ํ™˜
1048
+ highlights = extract_highlights_from_grok(grok_result)
1049
+ st.session_state.current_highlights = highlights
1050
+
1051
+ # 5. ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ์— ๋‹ต๋ณ€ ์ €์žฅ
1052
+ chat_data = {
1053
+ "role": "assistant",
1054
+ "content": answer,
1055
+ "sources": {
1056
+ "docs": search_results['documents'][0],
1057
+ "metas": search_results['metadatas'][0],
1058
+ "scores": search_results.get('scores', []),
1059
+ "keywords": search_results.get('keywords', []),
1060
+ "grok_verified": grok_result
1061
+ }
1062
+ }
1063
+ st.session_state.chat_history.append(chat_data)
1064
+ st.rerun()
1065
+
1066
+ except Exception as e:
1067
+ error_msg = f"โŒ ์˜ค๋ฅ˜: {str(e)}"
1068
+ st.session_state.chat_history.append({
1069
+ "role": "assistant",
1070
+ "content": error_msg
1071
+ })
1072
+ st.rerun()
1073
+
1074
+
1075
+ if __name__ == "__main__":
1076
+ main()