dohyune commited on
Commit
f2fb7e4
ยท
verified ยท
1 Parent(s): d08d599

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +208 -51
app.py CHANGED
@@ -1,8 +1,9 @@
1
  """
2
- PROBIN - RFx ๋ฌธ์„œ ๋ถ„์„ AI (ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ + Grok ์ ๊ฒ€ ๋ฒ„์ „)
3
 
4
  """
5
  import streamlit as st
 
6
  import fitz # PyMuPDF
7
  import chromadb
8
  from sentence_transformers import SentenceTransformer, util
@@ -17,6 +18,15 @@ import base64
17
  from dotenv import load_dotenv
18
  import json
19
 
 
 
 
 
 
 
 
 
 
20
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
21
  load_dotenv()
22
 
@@ -36,7 +46,7 @@ class HighlightConfig:
36
 
37
  # Page config
38
  st.set_page_config(
39
- page_title="PROBIN",
40
  page_icon="๐Ÿ”ฎ",
41
  layout="wide",
42
  initial_sidebar_state="expanded"
@@ -53,9 +63,35 @@ st.markdown("""
53
  width: 290px !important;
54
  }
55
 
 
56
  [data-testid="stSidebar"] h1 {
57
  color: white !important;
58
- text-shadow: 2px 2px 15px rgba(0,0,0,0.4);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
 
61
  /* ํŒŒ์ผ ์—…๋กœ๋” ๋ฐฐ๊ฒฝ ํˆฌ๋ช…ํ•˜๊ฒŒ */
@@ -144,11 +180,17 @@ st.markdown("""
144
  }
145
 
146
  /* ํ—ค๋” ์Šคํƒ€์ผ - ๋ฐ•์Šค ์ œ๊ฑฐ, ํ…์ŠคํŠธ ๊ทธ๋ฆผ์ž๋งŒ */
147
- .probin-header {
148
  padding: 1.5rem 2rem;
149
  margin-bottom: 2rem;
150
  }
151
- .probin-title {
 
 
 
 
 
 
152
  font-size: 2.5rem;
153
  font-weight: bold;
154
  color: white;
@@ -157,7 +199,7 @@ st.markdown("""
157
  text-shadow: 2px 2px 8px rgba(0, 0, 0, 0.4),
158
  0 0 20px rgba(102, 126, 234, 0.4);
159
  }
160
- .probin-subtitle {
161
  font-size: 1rem;
162
  color: rgba(255, 255, 255, 0.9);
163
  text-align: center;
@@ -258,6 +300,7 @@ st.markdown("""
258
  display: flex;
259
  align-items: center;
260
  margin: 1.5rem 0;
 
261
  font-size: 1.1rem;
262
  color: #2D3748;
263
  }
@@ -284,6 +327,70 @@ st.markdown("""
284
  align-items: center;
285
  margin-bottom: 1rem;
286
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  </style>
288
  """, unsafe_allow_html=True)
289
 
@@ -312,6 +419,8 @@ def init_session():
312
  st.session_state.highlight_config = HighlightConfig()
313
  if 'processing_query' not in st.session_state:
314
  st.session_state.processing_query = None
 
 
315
 
316
 
317
  def extract_text_from_pdf(pdf_file) -> Tuple[List[str], List[Dict], bytes, Dict]:
@@ -787,7 +896,7 @@ def render_pdf_with_highlights(pdf_bytes: bytes, highlight_info: List[Dict], zoo
787
 
788
  highlighted_pages = set(h['page'] for h in highlight_info)
789
 
790
- pdf_html = '<div class="pdf-container">'
791
 
792
  for page_num in range(len(doc)):
793
  page = doc[page_num]
@@ -800,7 +909,9 @@ def render_pdf_with_highlights(pdf_bytes: bytes, highlight_info: List[Dict], zoo
800
  # ์‹ค์ œ ์ด๋ฏธ์ง€ ํฌ๊ธฐ ๊ณ„์‚ฐ (zoom_level์— ๋”ฐ๋ผ)
801
  zoom_percentage = int(zoom_level * 50) # 2.0 = 100%, 1.0 = 50%
802
 
803
- pdf_html += '<div style="margin-bottom: 2rem; position: relative;">'
 
 
804
 
805
  # ํ•˜์ด๋ผ์ดํŠธ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ํŽ˜์ด์ง€ ํ—ค๋” ์Šคํƒ€์ผ ๋ณ€๊ฒฝ
806
  if (page_num + 1) in highlighted_pages:
@@ -815,6 +926,7 @@ def render_pdf_with_highlights(pdf_bytes: bytes, highlight_info: List[Dict], zoo
815
  pdf_html += '</div>'
816
 
817
  pdf_html += '</div>'
 
818
  doc.close()
819
 
820
  return pdf_html
@@ -826,16 +938,17 @@ def main():
826
 
827
  # Header ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ „์—๋งŒ ๋ณด์ž„
828
  if not st.session_state.processed:
829
- st.markdown("""
830
- <div class="probin-header">
831
- <div class="probin-title">๐Ÿ“„ PROBIN</div>
832
- <div class="probin-subtitle">RFx ๋ฌธ์„œ ๋ถ„์„ AI - ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ + Grok ์ ๊ฒ€</div>
833
  </div>
834
  """, unsafe_allow_html=True)
835
 
836
  # ========== ์‚ฌ์ด๋“œ๋ฐ” ==========
837
  with st.sidebar:
838
- st.title("๐Ÿ”ฎ PROBIN")
 
839
 
840
  uploaded_file = st.file_uploader(
841
  "๋“œ๋ž˜๊ทธํ•˜์—ฌ ํŒŒ์ผ์„ ์—…๋กœ๋“œ ๋˜๋Š” ํด๋ฆญํ•˜์—ฌ ์„ ํƒํ•˜์„ธ์š”.",
@@ -860,7 +973,7 @@ def main():
860
  try:
861
  chunks, metadata_list, pdf_bytes, pages_text = extract_text_from_pdf(uploaded_file)
862
 
863
- with st.spinner("๐Ÿ”ง ๋ฒกํ„ฐ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ƒ์„ฑ ์ค‘..."):
864
  collection, embedder = create_vector_db(chunks, metadata_list)
865
 
866
  st.session_state.vector_db = collection
@@ -889,19 +1002,19 @@ def main():
889
  # ์ฒญํฌ ํ‘œ์‹œ ์ œ๊ฑฐ๋จ
890
  # ============================================================
891
 
892
- st.divider()
893
 
894
  # ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
895
- if st.button("๐Ÿ”„ ์ƒˆ ๋ฌธ์„œ ์—…๋กœ๋“œ", use_container_width=True):
896
- st.session_state.processed = False
897
- st.session_state.vector_db = None
898
- st.session_state.embedder = None
899
- st.session_state.chat_history = []
900
- st.session_state.current_highlights = []
901
- st.session_state.pdf_bytes = None
902
- st.session_state.pdf_pages_text = {}
903
- st.session_state.zoom_level = 2.0
904
- st.rerun()
905
 
906
  # ===== ์•„์ง ๋ฌธ์„œ๊ฐ€ ์ฒ˜๋ฆฌ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
907
  if not st.session_state.processed:
@@ -910,19 +1023,19 @@ def main():
910
  <h2 style="text-align: center; color: #2D3748; margin-bottom: 1.5rem;">๐Ÿ“– ์‚ฌ์šฉ ๋ฐฉ๋ฒ•</h2>
911
  <div class="guide-step">
912
  <div class="step-number">1</div>
913
- <div>์˜ค๋ฅธ์ชฝ์— PDF ๋ฌธ์„œ๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”.</div>
914
  </div>
915
  <div class="guide-step">
916
  <div class="step-number">2</div>
917
- <div>๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ฐ€ ์™„๋ฃŒ๋  ๋•Œ๊นŒ์ง€ 30์ดˆ ์ •๋„ ๊ธฐ๋‹ค๋ฆฝ๋‹ˆ๋‹ค.</div>
918
  </div>
919
  <div class="guide-step">
920
  <div class="step-number">3</div>
921
- <div>์™ผ์ชฝ์—์„œ PDF๋ฅผ ํ™•์ธํ•˜๊ณ , ์˜ค๋ฅธ์ชฝ ์ฑ„ํŒ…์ฐฝ์—์„œ ์งˆ๋ฌธํ•˜์„ธ์š”.</div>
922
  </div>
923
  <div class="guide-step">
924
  <div class="step-number">4</div>
925
- <div>AI๊ฐ€ ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰์œผ๋กœ 3๊ฐœ ๊ฒฐ๊ณผ๋ฅผ ์ฐพ๊ณ , Grok์ด ์ตœ์ข… 1๊ฐœ๋งŒ ์„ ํƒํ•ด ํ•˜์ด๋ผ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.</div>
926
  </div>
927
  </div>
928
  """, unsafe_allow_html=True)
@@ -957,33 +1070,64 @@ def main():
957
  st.session_state.zoom_level
958
  )
959
  st.markdown(pdf_html, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
960
 
961
  with col2:
962
- st.markdown("### ๐Ÿ’ฌ AI ์ฑ—๋ด‡")
963
 
964
  # ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ๋‹ด์„ ์ปจํ…Œ์ด๋„ˆ
965
  chat_container = st.container(height=650)
966
 
967
  with chat_container:
968
- for msg in st.session_state.chat_history:
969
  with st.chat_message(msg["role"]):
970
  st.markdown(msg["content"])
971
 
972
  if msg["role"] == "assistant" and "sources" in msg:
973
  with st.expander("๐Ÿ“š ์ฐธ์กฐ ๋ฌธ์„œ"):
974
- for i, (doc, meta) in enumerate(zip(
975
- msg["sources"]["docs"],
976
- msg["sources"]["metas"]
977
- ), 1):
978
  # ํ…์ŠคํŠธ๋ฅผ 150์ž๋กœ ์ œํ•œํ•˜๊ณ  ๊ฐ„๊ฒฐํ•˜๊ฒŒ ํ‘œ์‹œ
979
  clean_text = doc[:150] + ('...' if len(doc) > 150 else '')
980
 
 
 
 
 
 
 
 
 
 
 
 
981
  st.markdown(f"""
982
- <div class="source-box">
983
- <div class="source-title">
984
- <span class="page-indicator">ํŽ˜์ด์ง€ {meta['page']}</span>
985
- </div>
986
- <div style="font-size: 0.9rem; color: #475569; margin-top: 0.3rem;">
987
  {clean_text}
988
  </div>
989
  </div>
@@ -998,10 +1142,20 @@ def main():
998
  # ํ…์ŠคํŠธ๋ฅผ 150์ž๋กœ ์ œํ•œ
999
  display_text = selected_text[:150] + ('...' if len(selected_text) > 150 else '')
1000
 
 
 
 
 
 
 
 
 
 
 
 
1001
  st.markdown(f"""
1002
- <div class="highlight-indicator">
1003
- <strong>โœ… ํŽ˜์ด์ง€ {grok_data.get('page', '?')}</strong><br>
1004
- <div style="margin-top: 0.5rem;">{display_text}</div>
1005
  </div>
1006
  """, unsafe_allow_html=True)
1007
 
@@ -1019,7 +1173,7 @@ def main():
1019
  query = st.session_state.processing_query
1020
  st.session_state.processing_query = None # ํ”Œ๋ž˜๊ทธ ๋ฆฌ์…‹
1021
 
1022
- with st.spinner("๐Ÿ” ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ ์ค‘..."):
1023
  try:
1024
  # 1. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ (๋ฒกํ„ฐ + ํ‚ค์›Œ๋“œ) - ์ƒ์œ„ 3๊ฐœ
1025
  search_results = hybrid_search(
@@ -1030,12 +1184,11 @@ def main():
1030
  )
1031
 
1032
  # 2. Grok API๋กœ ๊ฒ€์ฆ ๋ฐ ์ถ”์ถœ
1033
- with st.spinner("๐Ÿค– Grok AI ๊ฒ€์ฆ ์ค‘..."):
1034
- grok_result = grok_verify_and_extract(
1035
- query,
1036
- search_results,
1037
- GROK_API_KEY
1038
- )
1039
 
1040
  # 3. ๋‹ต๋ณ€ ์ƒ์„ฑ
1041
  answer = generate_answer(
@@ -1048,7 +1201,11 @@ def main():
1048
  highlights = extract_highlights_from_grok(grok_result)
1049
  st.session_state.current_highlights = highlights
1050
 
1051
- # 5. ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ์— ๋‹ต๋ณ€ ์ €์žฅ
 
 
 
 
1052
  chat_data = {
1053
  "role": "assistant",
1054
  "content": answer,
 
1
  """
2
+ PLOBIN - ๋ฌธ์„œ ์† ๋‹ต์„ ์ฐพ์•„์ฃผ๋Š” AI ๋น„์„œ
3
 
4
  """
5
  import streamlit as st
6
+ import streamlit.components.v1 as components
7
  import fitz # PyMuPDF
8
  import chromadb
9
  from sentence_transformers import SentenceTransformer, util
 
18
  from dotenv import load_dotenv
19
  import json
20
 
21
+ import base64
22
+
23
+ def get_image_base64(image_path):
24
+ with open(image_path, "rb") as img_file:
25
+ return base64.b64encode(img_file.read()).decode()
26
+
27
+ # ํŒŒ์ผ ์ƒ๋‹จ์—์„œ ํ•œ ๋ฒˆ๋งŒ ๋กœ๋“œ
28
+ plobin_logo_base64 = get_image_base64("img/plobin.png")
29
+
30
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
31
  load_dotenv()
32
 
 
46
 
47
  # Page config
48
  st.set_page_config(
49
+ page_title="PLOBIN",
50
  page_icon="๐Ÿ”ฎ",
51
  layout="wide",
52
  initial_sidebar_state="expanded"
 
63
  width: 290px !important;
64
  }
65
 
66
+ /* ์‚ฌ์ด๋“œ๋ฐ” ํƒ€์ดํ‹€ ๋น›๋‚˜๋Š” ํšจ๊ณผ */
67
  [data-testid="stSidebar"] h1 {
68
  color: white !important;
69
+ font-weight: 900 !important;
70
+ text-shadow:
71
+ 0 0 30px rgba(255,255,255,0.6),
72
+ 0 0 50px rgba(102,126,234,0.4),
73
+ 3px 3px 40px rgba(0,0,0,0.4);
74
+ animation: sidebarTitlePulse 4s ease-in-out infinite;
75
+ letter-spacing: 2px;
76
+ }
77
+
78
+ /* ์‚ฌ์ด๋“œ๋ฐ” ํƒ€์ดํ‹€ ์• ๋‹ˆ๋ฉ”์ด์…˜ */
79
+ @keyframes sidebarTitlePulse {
80
+ 0%, 100% {
81
+ transform: scale(1);
82
+ text-shadow:
83
+ 0 0 30px rgba(255,255,255,0.6),
84
+ 0 0 50px rgba(102,126,234,0.4),
85
+ 3px 3px 40px rgba(0,0,0,0.4);
86
+ }
87
+ 50% {
88
+ transform: scale(1.03);
89
+ text-shadow:
90
+ 0 0 40px rgba(255,255,255,0.8),
91
+ 0 0 70px rgba(102,126,234,0.6),
92
+ 0 0 100px rgba(118,75,162,0.4),
93
+ 3px 3px 40px rgba(0,0,0,0.4);
94
+ }
95
  }
96
 
97
  /* ํŒŒ์ผ ์—…๋กœ๋” ๋ฐฐ๊ฒฝ ํˆฌ๋ช…ํ•˜๊ฒŒ */
 
180
  }
181
 
182
  /* ํ—ค๋” ์Šคํƒ€์ผ - ๋ฐ•์Šค ์ œ๊ฑฐ, ํ…์ŠคํŠธ ๊ทธ๋ฆผ์ž๋งŒ */
183
+ .plobin-header {
184
  padding: 1.5rem 2rem;
185
  margin-bottom: 2rem;
186
  }
187
+ .plobin-logo {
188
+ display: block;
189
+ margin: 0 auto;
190
+ height: 60px; /* ์›ํ•˜๋Š” ํฌ๊ธฐ๋กœ ์กฐ์ • */
191
+ }
192
+
193
+ .plobin-title {
194
  font-size: 2.5rem;
195
  font-weight: bold;
196
  color: white;
 
199
  text-shadow: 2px 2px 8px rgba(0, 0, 0, 0.4),
200
  0 0 20px rgba(102, 126, 234, 0.4);
201
  }
202
+ .plobin-subtitle {
203
  font-size: 1rem;
204
  color: rgba(255, 255, 255, 0.9);
205
  text-align: center;
 
300
  display: flex;
301
  align-items: center;
302
  margin: 1.5rem 0;
303
+ margin-left: 3.5rem;
304
  font-size: 1.1rem;
305
  color: #2D3748;
306
  }
 
327
  align-items: center;
328
  margin-bottom: 1rem;
329
  }
330
+
331
+ /* ํŽ˜์ด์ง€ ์„ ํƒ ์• ๋‹ˆ๋ฉ”์ด์…˜ */
332
+ @keyframes pulse {
333
+ 0%, 100% {
334
+ box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
335
+ }
336
+ 50% {
337
+ box-shadow: 0 0 20px 10px rgba(16, 185, 129, 0);
338
+ }
339
+ }
340
+
341
+ /* ์ฑ„ํŒ… ํƒ€์ดํ‹€ ์Šคํƒ€์ผ (์• ๋‹ˆ๋ฉ”์ด์…˜ ์ œ๊ฑฐ) */
342
+ .chat-title {
343
+ color: white !important;
344
+ font-weight: 900 !important;
345
+ font-size: 1.75rem !important;
346
+ margin-bottom: 1rem !important;
347
+ text-shadow:
348
+ 0 0 30px rgba(255,255,255,0.6),
349
+ 0 0 50px rgba(102,126,234,0.4),
350
+ 3px 3px 40px rgba(0,0,0,0.4);
351
+ letter-spacing: 2px;
352
+ }
353
+
354
+ /* ์ฑ„ํŒ… ๋‚ด ํŽ˜์ด์ง€ ๋ฒˆํ˜ธ ๋ฒ„ํŠผ ์Šคํƒ€์ผ - ํด๋ฆญ ๊ฐ€๋Šฅํ•œ ๋ฐ•์Šค์ฒ˜๋Ÿผ */
355
+ [data-testid="column"] button[kind="secondary"] {
356
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
357
+ color: white !important;
358
+ border: none !important;
359
+ border-radius: 0.5rem !important;
360
+ padding: 0.6rem 1rem !important;
361
+ font-weight: bold !important;
362
+ font-size: 0.95rem !important;
363
+ text-align: left !important;
364
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
365
+ transition: all 0.2s ease !important;
366
+ cursor: pointer !important;
367
+ }
368
+
369
+ [data-testid="column"] button[kind="secondary"]:hover {
370
+ transform: translateY(-2px) !important;
371
+ box-shadow: 0 4px 8px rgba(102, 126, 234, 0.3) !important;
372
+ background: linear-gradient(135deg, #7c8ff5 0%, #8a5db8 100%) !important;
373
+ }
374
+
375
+ [data-testid="column"] button[kind="primary"] {
376
+ background: linear-gradient(135deg, #FEF08A 0%, #FDE047 100%) !important;
377
+ color: #854D0E !important;
378
+ border: 2px solid #EAB308 !important;
379
+ border-radius: 0.5rem !important;
380
+ padding: 0.6rem 1rem !important;
381
+ font-weight: bold !important;
382
+ font-size: 0.95rem !important;
383
+ text-align: left !important;
384
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
385
+ transition: all 0.2s ease !important;
386
+ cursor: pointer !important;
387
+ }
388
+
389
+ [data-testid="column"] button[kind="primary"]:hover {
390
+ transform: translateY(-2px) !important;
391
+ box-shadow: 0 4px 8px rgba(234, 179, 8, 0.3) !important;
392
+ background: linear-gradient(135deg, #FDE047 0%, #FACC15 100%) !important;
393
+ }
394
  </style>
395
  """, unsafe_allow_html=True)
396
 
 
419
  st.session_state.highlight_config = HighlightConfig()
420
  if 'processing_query' not in st.session_state:
421
  st.session_state.processing_query = None
422
+ if 'scroll_to_page' not in st.session_state:
423
+ st.session_state.scroll_to_page = None
424
 
425
 
426
  def extract_text_from_pdf(pdf_file) -> Tuple[List[str], List[Dict], bytes, Dict]:
 
896
 
897
  highlighted_pages = set(h['page'] for h in highlight_info)
898
 
899
+ pdf_html = '<div class="pdf-container" id="pdf-viewer-container">'
900
 
901
  for page_num in range(len(doc)):
902
  page = doc[page_num]
 
909
  # ์‹ค์ œ ์ด๋ฏธ์ง€ ํฌ๊ธฐ ๊ณ„์‚ฐ (zoom_level์— ๋”ฐ๋ผ)
910
  zoom_percentage = int(zoom_level * 50) # 2.0 = 100%, 1.0 = 50%
911
 
912
+ # ๊ฐ ํŽ˜์ด์ง€์— ๊ณ ์œ  ID ๋ถ€์—ฌ
913
+ page_id = f'page-{page_num + 1}'
914
+ pdf_html += f'<div id="{page_id}" style="margin-bottom: 2rem; position: relative;">'
915
 
916
  # ํ•˜์ด๋ผ์ดํŠธ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ํŽ˜์ด์ง€ ํ—ค๋” ์Šคํƒ€์ผ ๋ณ€๊ฒฝ
917
  if (page_num + 1) in highlighted_pages:
 
926
  pdf_html += '</div>'
927
 
928
  pdf_html += '</div>'
929
+
930
  doc.close()
931
 
932
  return pdf_html
 
938
 
939
  # Header ๋ฌธ์„œ ์ฒ˜๋ฆฌ ์ „์—๋งŒ ๋ณด์ž„
940
  if not st.session_state.processed:
941
+ st.markdown(f"""
942
+ <div class="plobin-header">
943
+ <img src="data:image/png;base64,{plobin_logo_base64}" class="plobin-logo" alt="PLOBIN" style="height: 60px; margin-bottom: 10px;">
944
+ <div class="plobin-subtitle">๋ฌธ์„œ ์† ๋‹ต์„ ์ฐพ์•„์ฃผ๋Š” AI ๋น„์„œ</div>
945
  </div>
946
  """, unsafe_allow_html=True)
947
 
948
  # ========== ์‚ฌ์ด๋“œ๋ฐ” ==========
949
  with st.sidebar:
950
+ st.image("img/plobin.png", width=120) # ํ”ฝ์…€ ๊ฐ’์œผ๋กœ ์ง์ ‘ ์ง€์ •
951
+ # st.title("๐Ÿ”ฎ PLOBIN")
952
 
953
  uploaded_file = st.file_uploader(
954
  "๋“œ๋ž˜๊ทธํ•˜์—ฌ ํŒŒ์ผ์„ ์—…๋กœ๋“œ ๋˜๋Š” ํด๋ฆญํ•˜์—ฌ ์„ ํƒํ•˜์„ธ์š”.",
 
973
  try:
974
  chunks, metadata_list, pdf_bytes, pages_text = extract_text_from_pdf(uploaded_file)
975
 
976
+ with st.spinner("๐Ÿค– ๋ฌธ์„œ๋ฅผ AI๊ฐ€ ์ดํ•ดํ•  ์ˆ˜ ์žˆ๊ฒŒ ์ฒ˜๋ฆฌ ์ค‘.."):
977
  collection, embedder = create_vector_db(chunks, metadata_list)
978
 
979
  st.session_state.vector_db = collection
 
1002
  # ์ฒญํฌ ํ‘œ์‹œ ์ œ๊ฑฐ๋จ
1003
  # ============================================================
1004
 
1005
+ # st.divider()
1006
 
1007
  # ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
1008
+ # if st.button("๐Ÿ”„ ์ƒˆ ๋ฌธ์„œ ์—…๋กœ๋“œ", use_container_width=True):
1009
+ # st.session_state.processed = False
1010
+ # st.session_state.vector_db = None
1011
+ # st.session_state.embedder = None
1012
+ # st.session_state.chat_history = []
1013
+ # st.session_state.current_highlights = []
1014
+ # st.session_state.pdf_bytes = None
1015
+ # st.session_state.pdf_pages_text = {}
1016
+ # st.session_state.zoom_level = 2.0
1017
+ # st.rerun()
1018
 
1019
  # ===== ์•„์ง ๋ฌธ์„œ๊ฐ€ ์ฒ˜๋ฆฌ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ
1020
  if not st.session_state.processed:
 
1023
  <h2 style="text-align: center; color: #2D3748; margin-bottom: 1.5rem;">๐Ÿ“– ์‚ฌ์šฉ ๋ฐฉ๋ฒ•</h2>
1024
  <div class="guide-step">
1025
  <div class="step-number">1</div>
1026
+ <div>๐Ÿ“ค PDF ํŒŒ์ผ์„ ์˜ฌ๋ ค์ฃผ์„ธ์š”</div>
1027
  </div>
1028
  <div class="guide-step">
1029
  <div class="step-number">2</div>
1030
+ <div>๋ฌธ์„œ ์ฒ˜๋ฆฌ๊ฐ€ ์™„๋ฃŒ๋  ๋•Œ๊นŒ์ง€ ์ž ์‹œ๋งŒ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”</div>
1031
  </div>
1032
  <div class="guide-step">
1033
  <div class="step-number">3</div>
1034
+ <div>๋ฌธ์„œ ๋‚ด ๊ถ๊ธˆํ•œ ๋‚ด์šฉ์„ ๋ฌผ์–ด๋ณด์„ธ์š”</div>
1035
  </div>
1036
  <div class="guide-step">
1037
  <div class="step-number">4</div>
1038
+ <div>AI๊ฐ€ ์ •ํ™•ํ•œ ๋‹ต๋ณ€๊ณผ ์ถœ์ฒ˜๋ฅผ ํ•จ๊ป˜ ์•Œ๋ ค๋“œ๋ ค์š”</div>
1039
  </div>
1040
  </div>
1041
  """, unsafe_allow_html=True)
 
1070
  st.session_state.zoom_level
1071
  )
1072
  st.markdown(pdf_html, unsafe_allow_html=True)
1073
+
1074
+ # ์Šคํฌ๋กค ๊ธฐ๋Šฅ - JavaScript๋กœ ๊ตฌํ˜„
1075
+ if st.session_state.scroll_to_page:
1076
+ scroll_js = f"""
1077
+ <script>
1078
+ // PDF ์ปจํ…Œ์ด๋„ˆ ์ฐพ๊ธฐ
1079
+ const container = parent.document.querySelector('.pdf-container');
1080
+ const targetPage = parent.document.getElementById('page-{st.session_state.scroll_to_page}');
1081
+
1082
+ if (container && targetPage) {{
1083
+ // ์ปจํ…Œ์ด๋„ˆ ๋‚ด์—์„œ ํƒ€๊ฒŸ ํŽ˜์ด์ง€์˜ ์œ„์น˜ ๊ณ„์‚ฐ
1084
+ const containerRect = container.getBoundingClientRect();
1085
+ const targetRect = targetPage.getBoundingClientRect();
1086
+ const scrollTop = container.scrollTop;
1087
+ const offset = targetRect.top - containerRect.top + scrollTop;
1088
+
1089
+ // ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ์Šคํฌ๋กค
1090
+ container.scrollTo({{
1091
+ top: offset - 20,
1092
+ behavior: 'smooth'
1093
+ }});
1094
+ }}
1095
+ </script>
1096
+ """
1097
+ components.html(scroll_js, height=0)
1098
+ st.session_state.scroll_to_page = None
1099
 
1100
  with col2:
1101
+ st.markdown('<h3 class="chat-title">๐Ÿ”ฎ PLOBIN CHAT</h3>', unsafe_allow_html=True)
1102
 
1103
  # ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ๋‹ด์„ ์ปจํ…Œ์ด๋„ˆ
1104
  chat_container = st.container(height=650)
1105
 
1106
  with chat_container:
1107
+ for msg_idx, msg in enumerate(st.session_state.chat_history):
1108
  with st.chat_message(msg["role"]):
1109
  st.markdown(msg["content"])
1110
 
1111
  if msg["role"] == "assistant" and "sources" in msg:
1112
  with st.expander("๐Ÿ“š ์ฐธ์กฐ ๋ฌธ์„œ"):
1113
+ for idx, (doc, meta) in enumerate(zip(msg["sources"]["docs"], msg["sources"]["metas"])):
 
 
 
1114
  # ํ…์ŠคํŠธ๋ฅผ 150์ž๋กœ ์ œํ•œํ•˜๊ณ  ๊ฐ„๊ฒฐํ•˜๊ฒŒ ํ‘œ์‹œ
1115
  clean_text = doc[:150] + ('...' if len(doc) > 150 else '')
1116
 
1117
+ # ํŽ˜์ด์ง€ ๋ฒˆํ˜ธ ๋ฒ„ํŠผ (๋ฐ•์Šค์ฒ˜๋Ÿผ ๋ณด์ด๊ฒŒ) - msg_idx ์ถ”๊ฐ€๋กœ ๊ณ ์œ  ํ‚ค ์ƒ์„ฑ
1118
+ if st.button(
1119
+ f"๐Ÿ“„ ํŽ˜์ด์ง€ {meta['page']}",
1120
+ key=f"goto_source_msg{msg_idx}_{meta['page']}_{idx}",
1121
+ use_container_width=True,
1122
+ type="secondary"
1123
+ ):
1124
+ st.session_state.scroll_to_page = meta['page']
1125
+ st.rerun()
1126
+
1127
+ # ๋ฌธ์„œ ๋‚ด์šฉ ํ‘œ์‹œ
1128
  st.markdown(f"""
1129
+ <div style="background: #F1F5F9; padding: 0.8rem; border-radius: 0.5rem; margin-bottom: 1rem; border-left: 3px solid #667eea;">
1130
+ <div style="font-size: 0.9rem; color: #475569;">
 
 
 
1131
  {clean_text}
1132
  </div>
1133
  </div>
 
1142
  # ํ…์ŠคํŠธ๋ฅผ 150์ž๋กœ ์ œํ•œ
1143
  display_text = selected_text[:150] + ('...' if len(selected_text) > 150 else '')
1144
 
1145
+ # ํŽ˜์ด์ง€ ๋ฒˆํ˜ธ ๋ฒ„ํŠผ (ํ•˜์ด๋ผ์ดํŠธ ์Šคํƒ€์ผ) - msg_idx ์ถ”๊ฐ€๋กœ ๊ณ ์œ  ํ‚ค ์ƒ์„ฑ
1146
+ if st.button(
1147
+ f"โญ ํŽ˜์ด์ง€ {grok_data.get('page', '?')}",
1148
+ key=f"goto_grok_msg{msg_idx}_{grok_data.get('page', 0)}",
1149
+ use_container_width=True,
1150
+ type="primary"
1151
+ ):
1152
+ st.session_state.scroll_to_page = grok_data.get('page', 1)
1153
+ st.rerun()
1154
+
1155
+ # ์„ ํƒ๋œ ํ…์ŠคํŠธ ํ‘œ์‹œ
1156
  st.markdown(f"""
1157
+ <div style="background: #FEF08A; color: #854D0E; padding: 0.8rem; border-radius: 0.5rem; margin-top: 0.5rem; border-left: 4px solid #EAB308;">
1158
+ <div style="font-size: 0.9rem;">{display_text}</div>
 
1159
  </div>
1160
  """, unsafe_allow_html=True)
1161
 
 
1173
  query = st.session_state.processing_query
1174
  st.session_state.processing_query = None # ํ”Œ๋ž˜๊ทธ ๋ฆฌ์…‹
1175
 
1176
+ with st.spinner("๐Ÿ”ฎ PLOBIN์ด ๊ฒ€์ƒ‰์ค‘์ž…๋‹ˆ๋‹ค..."):
1177
  try:
1178
  # 1. ํ•˜์ด๋ธŒ๋ฆฌ๋“œ ๊ฒ€์ƒ‰ (๋ฒกํ„ฐ + ํ‚ค์›Œ๋“œ) - ์ƒ์œ„ 3๊ฐœ
1179
  search_results = hybrid_search(
 
1184
  )
1185
 
1186
  # 2. Grok API๋กœ ๊ฒ€์ฆ ๋ฐ ์ถ”์ถœ
1187
+ grok_result = grok_verify_and_extract(
1188
+ query,
1189
+ search_results,
1190
+ GROK_API_KEY
1191
+ )
 
1192
 
1193
  # 3. ๋‹ต๋ณ€ ์ƒ์„ฑ
1194
  answer = generate_answer(
 
1201
  highlights = extract_highlights_from_grok(grok_result)
1202
  st.session_state.current_highlights = highlights
1203
 
1204
+ # 5. Grok์ด ์„ ํƒํ•œ ํŽ˜์ด์ง€๋กœ ์ž๋™ ์Šคํฌ๋กค ์„ค์ •
1205
+ if grok_result and "page" in grok_result and "error" not in grok_result:
1206
+ st.session_state.scroll_to_page = grok_result["page"]
1207
+
1208
+ # 6. ์ฑ„ํŒ… ํžˆ์Šคํ† ๋ฆฌ์— ๋‹ต๋ณ€ ์ €์žฅ
1209
  chat_data = {
1210
  "role": "assistant",
1211
  "content": answer,