Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
|
| 4 |
"""
|
| 5 |
import streamlit as st
|
|
|
|
| 6 |
import fitz # PyMuPDF
|
| 7 |
import chromadb
|
| 8 |
from sentence_transformers import SentenceTransformer, util
|
|
@@ -17,6 +18,15 @@ import base64
|
|
| 17 |
from dotenv import load_dotenv
|
| 18 |
import json
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# ํ๊ฒฝ ๋ณ์ ๋ก๋
|
| 21 |
load_dotenv()
|
| 22 |
|
|
@@ -36,7 +46,7 @@ class HighlightConfig:
|
|
| 36 |
|
| 37 |
# Page config
|
| 38 |
st.set_page_config(
|
| 39 |
-
page_title="
|
| 40 |
page_icon="๐ฎ",
|
| 41 |
layout="wide",
|
| 42 |
initial_sidebar_state="expanded"
|
|
@@ -53,9 +63,35 @@ st.markdown("""
|
|
| 53 |
width: 290px !important;
|
| 54 |
}
|
| 55 |
|
|
|
|
| 56 |
[data-testid="stSidebar"] h1 {
|
| 57 |
color: white !important;
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
}
|
| 60 |
|
| 61 |
/* ํ์ผ ์
๋ก๋ ๋ฐฐ๊ฒฝ ํฌ๋ช
ํ๊ฒ */
|
|
@@ -144,11 +180,17 @@ st.markdown("""
|
|
| 144 |
}
|
| 145 |
|
| 146 |
/* ํค๋ ์คํ์ผ - ๋ฐ์ค ์ ๊ฑฐ, ํ
์คํธ ๊ทธ๋ฆผ์๋ง */
|
| 147 |
-
.
|
| 148 |
padding: 1.5rem 2rem;
|
| 149 |
margin-bottom: 2rem;
|
| 150 |
}
|
| 151 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
font-size: 2.5rem;
|
| 153 |
font-weight: bold;
|
| 154 |
color: white;
|
|
@@ -157,7 +199,7 @@ st.markdown("""
|
|
| 157 |
text-shadow: 2px 2px 8px rgba(0, 0, 0, 0.4),
|
| 158 |
0 0 20px rgba(102, 126, 234, 0.4);
|
| 159 |
}
|
| 160 |
-
.
|
| 161 |
font-size: 1rem;
|
| 162 |
color: rgba(255, 255, 255, 0.9);
|
| 163 |
text-align: center;
|
|
@@ -258,6 +300,7 @@ st.markdown("""
|
|
| 258 |
display: flex;
|
| 259 |
align-items: center;
|
| 260 |
margin: 1.5rem 0;
|
|
|
|
| 261 |
font-size: 1.1rem;
|
| 262 |
color: #2D3748;
|
| 263 |
}
|
|
@@ -284,6 +327,70 @@ st.markdown("""
|
|
| 284 |
align-items: center;
|
| 285 |
margin-bottom: 1rem;
|
| 286 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
</style>
|
| 288 |
""", unsafe_allow_html=True)
|
| 289 |
|
|
@@ -312,6 +419,8 @@ def init_session():
|
|
| 312 |
st.session_state.highlight_config = HighlightConfig()
|
| 313 |
if 'processing_query' not in st.session_state:
|
| 314 |
st.session_state.processing_query = None
|
|
|
|
|
|
|
| 315 |
|
| 316 |
|
| 317 |
def extract_text_from_pdf(pdf_file) -> Tuple[List[str], List[Dict], bytes, Dict]:
|
|
@@ -787,7 +896,7 @@ def render_pdf_with_highlights(pdf_bytes: bytes, highlight_info: List[Dict], zoo
|
|
| 787 |
|
| 788 |
highlighted_pages = set(h['page'] for h in highlight_info)
|
| 789 |
|
| 790 |
-
pdf_html = '<div class="pdf-container">'
|
| 791 |
|
| 792 |
for page_num in range(len(doc)):
|
| 793 |
page = doc[page_num]
|
|
@@ -800,7 +909,9 @@ def render_pdf_with_highlights(pdf_bytes: bytes, highlight_info: List[Dict], zoo
|
|
| 800 |
# ์ค์ ์ด๋ฏธ์ง ํฌ๊ธฐ ๊ณ์ฐ (zoom_level์ ๋ฐ๋ผ)
|
| 801 |
zoom_percentage = int(zoom_level * 50) # 2.0 = 100%, 1.0 = 50%
|
| 802 |
|
| 803 |
-
|
|
|
|
|
|
|
| 804 |
|
| 805 |
# ํ์ด๋ผ์ดํธ ์ฌ๋ถ์ ๋ฐ๋ผ ํ์ด์ง ํค๋ ์คํ์ผ ๋ณ๊ฒฝ
|
| 806 |
if (page_num + 1) in highlighted_pages:
|
|
@@ -815,6 +926,7 @@ def render_pdf_with_highlights(pdf_bytes: bytes, highlight_info: List[Dict], zoo
|
|
| 815 |
pdf_html += '</div>'
|
| 816 |
|
| 817 |
pdf_html += '</div>'
|
|
|
|
| 818 |
doc.close()
|
| 819 |
|
| 820 |
return pdf_html
|
|
@@ -826,16 +938,17 @@ def main():
|
|
| 826 |
|
| 827 |
# Header ๋ฌธ์ ์ฒ๋ฆฌ ์ ์๋ง ๋ณด์
|
| 828 |
if not st.session_state.processed:
|
| 829 |
-
st.markdown("""
|
| 830 |
-
<div class="
|
| 831 |
-
<
|
| 832 |
-
<div class="
|
| 833 |
</div>
|
| 834 |
""", unsafe_allow_html=True)
|
| 835 |
|
| 836 |
# ========== ์ฌ์ด๋๋ฐ ==========
|
| 837 |
with st.sidebar:
|
| 838 |
-
st.
|
|
|
|
| 839 |
|
| 840 |
uploaded_file = st.file_uploader(
|
| 841 |
"๋๋๊ทธํ์ฌ ํ์ผ์ ์
๋ก๋ ๋๋ ํด๋ฆญํ์ฌ ์ ํํ์ธ์.",
|
|
@@ -860,7 +973,7 @@ def main():
|
|
| 860 |
try:
|
| 861 |
chunks, metadata_list, pdf_bytes, pages_text = extract_text_from_pdf(uploaded_file)
|
| 862 |
|
| 863 |
-
with st.spinner("
|
| 864 |
collection, embedder = create_vector_db(chunks, metadata_list)
|
| 865 |
|
| 866 |
st.session_state.vector_db = collection
|
|
@@ -889,19 +1002,19 @@ def main():
|
|
| 889 |
# ์ฒญํฌ ํ์ ์ ๊ฑฐ๋จ
|
| 890 |
# ============================================================
|
| 891 |
|
| 892 |
-
st.divider()
|
| 893 |
|
| 894 |
# ์ด๊ธฐํ ๋ฒํผ
|
| 895 |
-
if st.button("๐ ์ ๋ฌธ์ ์
๋ก๋", use_container_width=True):
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
|
| 906 |
# ===== ์์ง ๋ฌธ์๊ฐ ์ฒ๋ฆฌ๋์ง ์์ ๊ฒฝ์ฐ
|
| 907 |
if not st.session_state.processed:
|
|
@@ -910,19 +1023,19 @@ def main():
|
|
| 910 |
<h2 style="text-align: center; color: #2D3748; margin-bottom: 1.5rem;">๐ ์ฌ์ฉ ๋ฐฉ๋ฒ</h2>
|
| 911 |
<div class="guide-step">
|
| 912 |
<div class="step-number">1</div>
|
| 913 |
-
<div
|
| 914 |
</div>
|
| 915 |
<div class="guide-step">
|
| 916 |
<div class="step-number">2</div>
|
| 917 |
-
<div>๋ฌธ์ ์ฒ๋ฆฌ๊ฐ ์๋ฃ๋ ๋๊น์ง
|
| 918 |
</div>
|
| 919 |
<div class="guide-step">
|
| 920 |
<div class="step-number">3</div>
|
| 921 |
-
<div
|
| 922 |
</div>
|
| 923 |
<div class="guide-step">
|
| 924 |
<div class="step-number">4</div>
|
| 925 |
-
<div>AI๊ฐ
|
| 926 |
</div>
|
| 927 |
</div>
|
| 928 |
""", unsafe_allow_html=True)
|
|
@@ -957,33 +1070,64 @@ def main():
|
|
| 957 |
st.session_state.zoom_level
|
| 958 |
)
|
| 959 |
st.markdown(pdf_html, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 960 |
|
| 961 |
with col2:
|
| 962 |
-
st.markdown("
|
| 963 |
|
| 964 |
# ์ฑํ
ํ์คํ ๋ฆฌ๋ฅผ ๋ด์ ์ปจํ
์ด๋
|
| 965 |
chat_container = st.container(height=650)
|
| 966 |
|
| 967 |
with chat_container:
|
| 968 |
-
for msg in st.session_state.chat_history:
|
| 969 |
with st.chat_message(msg["role"]):
|
| 970 |
st.markdown(msg["content"])
|
| 971 |
|
| 972 |
if msg["role"] == "assistant" and "sources" in msg:
|
| 973 |
with st.expander("๐ ์ฐธ์กฐ ๋ฌธ์"):
|
| 974 |
-
for
|
| 975 |
-
msg["sources"]["docs"],
|
| 976 |
-
msg["sources"]["metas"]
|
| 977 |
-
), 1):
|
| 978 |
# ํ
์คํธ๋ฅผ 150์๋ก ์ ํํ๊ณ ๊ฐ๊ฒฐํ๊ฒ ํ์
|
| 979 |
clean_text = doc[:150] + ('...' if len(doc) > 150 else '')
|
| 980 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 981 |
st.markdown(f"""
|
| 982 |
-
<div
|
| 983 |
-
<div
|
| 984 |
-
<span class="page-indicator">ํ์ด์ง {meta['page']}</span>
|
| 985 |
-
</div>
|
| 986 |
-
<div style="font-size: 0.9rem; color: #475569; margin-top: 0.3rem;">
|
| 987 |
{clean_text}
|
| 988 |
</div>
|
| 989 |
</div>
|
|
@@ -998,10 +1142,20 @@ def main():
|
|
| 998 |
# ํ
์คํธ๋ฅผ 150์๋ก ์ ํ
|
| 999 |
display_text = selected_text[:150] + ('...' if len(selected_text) > 150 else '')
|
| 1000 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1001 |
st.markdown(f"""
|
| 1002 |
-
<div
|
| 1003 |
-
<
|
| 1004 |
-
<div style="margin-top: 0.5rem;">{display_text}</div>
|
| 1005 |
</div>
|
| 1006 |
""", unsafe_allow_html=True)
|
| 1007 |
|
|
@@ -1019,7 +1173,7 @@ def main():
|
|
| 1019 |
query = st.session_state.processing_query
|
| 1020 |
st.session_state.processing_query = None # ํ๋๊ทธ ๋ฆฌ์
|
| 1021 |
|
| 1022 |
-
with st.spinner("
|
| 1023 |
try:
|
| 1024 |
# 1. ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์ (๋ฒกํฐ + ํค์๋) - ์์ 3๊ฐ
|
| 1025 |
search_results = hybrid_search(
|
|
@@ -1030,12 +1184,11 @@ def main():
|
|
| 1030 |
)
|
| 1031 |
|
| 1032 |
# 2. Grok API๋ก ๊ฒ์ฆ ๋ฐ ์ถ์ถ
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
)
|
| 1039 |
|
| 1040 |
# 3. ๋ต๋ณ ์์ฑ
|
| 1041 |
answer = generate_answer(
|
|
@@ -1048,7 +1201,11 @@ def main():
|
|
| 1048 |
highlights = extract_highlights_from_grok(grok_result)
|
| 1049 |
st.session_state.current_highlights = highlights
|
| 1050 |
|
| 1051 |
-
# 5.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1052 |
chat_data = {
|
| 1053 |
"role": "assistant",
|
| 1054 |
"content": answer,
|
|
|
|
| 1 |
"""
|
| 2 |
+
PLOBIN - ๋ฌธ์ ์ ๋ต์ ์ฐพ์์ฃผ๋ AI ๋น์
|
| 3 |
|
| 4 |
"""
|
| 5 |
import streamlit as st
|
| 6 |
+
import streamlit.components.v1 as components
|
| 7 |
import fitz # PyMuPDF
|
| 8 |
import chromadb
|
| 9 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
import json
|
| 20 |
|
| 21 |
+
import base64
|
| 22 |
+
|
| 23 |
+
def get_image_base64(image_path):
|
| 24 |
+
with open(image_path, "rb") as img_file:
|
| 25 |
+
return base64.b64encode(img_file.read()).decode()
|
| 26 |
+
|
| 27 |
+
# ํ์ผ ์๋จ์์ ํ ๋ฒ๋ง ๋ก๋
|
| 28 |
+
plobin_logo_base64 = get_image_base64("img/plobin.png")
|
| 29 |
+
|
| 30 |
# ํ๊ฒฝ ๋ณ์ ๋ก๋
|
| 31 |
load_dotenv()
|
| 32 |
|
|
|
|
| 46 |
|
| 47 |
# Page config
|
| 48 |
st.set_page_config(
|
| 49 |
+
page_title="PLOBIN",
|
| 50 |
page_icon="๐ฎ",
|
| 51 |
layout="wide",
|
| 52 |
initial_sidebar_state="expanded"
|
|
|
|
| 63 |
width: 290px !important;
|
| 64 |
}
|
| 65 |
|
| 66 |
+
/* ์ฌ์ด๋๋ฐ ํ์ดํ ๋น๋๋ ํจ๊ณผ */
|
| 67 |
[data-testid="stSidebar"] h1 {
|
| 68 |
color: white !important;
|
| 69 |
+
font-weight: 900 !important;
|
| 70 |
+
text-shadow:
|
| 71 |
+
0 0 30px rgba(255,255,255,0.6),
|
| 72 |
+
0 0 50px rgba(102,126,234,0.4),
|
| 73 |
+
3px 3px 40px rgba(0,0,0,0.4);
|
| 74 |
+
animation: sidebarTitlePulse 4s ease-in-out infinite;
|
| 75 |
+
letter-spacing: 2px;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
/* ์ฌ์ด๋๋ฐ ํ์ดํ ์ ๋๋ฉ์ด์
*/
|
| 79 |
+
@keyframes sidebarTitlePulse {
|
| 80 |
+
0%, 100% {
|
| 81 |
+
transform: scale(1);
|
| 82 |
+
text-shadow:
|
| 83 |
+
0 0 30px rgba(255,255,255,0.6),
|
| 84 |
+
0 0 50px rgba(102,126,234,0.4),
|
| 85 |
+
3px 3px 40px rgba(0,0,0,0.4);
|
| 86 |
+
}
|
| 87 |
+
50% {
|
| 88 |
+
transform: scale(1.03);
|
| 89 |
+
text-shadow:
|
| 90 |
+
0 0 40px rgba(255,255,255,0.8),
|
| 91 |
+
0 0 70px rgba(102,126,234,0.6),
|
| 92 |
+
0 0 100px rgba(118,75,162,0.4),
|
| 93 |
+
3px 3px 40px rgba(0,0,0,0.4);
|
| 94 |
+
}
|
| 95 |
}
|
| 96 |
|
| 97 |
/* ํ์ผ ์
๋ก๋ ๋ฐฐ๊ฒฝ ํฌ๋ช
ํ๊ฒ */
|
|
|
|
| 180 |
}
|
| 181 |
|
| 182 |
/* ํค๋ ์คํ์ผ - ๋ฐ์ค ์ ๊ฑฐ, ํ
์คํธ ๊ทธ๋ฆผ์๋ง */
|
| 183 |
+
.plobin-header {
|
| 184 |
padding: 1.5rem 2rem;
|
| 185 |
margin-bottom: 2rem;
|
| 186 |
}
|
| 187 |
+
.plobin-logo {
|
| 188 |
+
display: block;
|
| 189 |
+
margin: 0 auto;
|
| 190 |
+
height: 60px; /* ์ํ๋ ํฌ๊ธฐ๋ก ์กฐ์ */
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
.plobin-title {
|
| 194 |
font-size: 2.5rem;
|
| 195 |
font-weight: bold;
|
| 196 |
color: white;
|
|
|
|
| 199 |
text-shadow: 2px 2px 8px rgba(0, 0, 0, 0.4),
|
| 200 |
0 0 20px rgba(102, 126, 234, 0.4);
|
| 201 |
}
|
| 202 |
+
.plobin-subtitle {
|
| 203 |
font-size: 1rem;
|
| 204 |
color: rgba(255, 255, 255, 0.9);
|
| 205 |
text-align: center;
|
|
|
|
| 300 |
display: flex;
|
| 301 |
align-items: center;
|
| 302 |
margin: 1.5rem 0;
|
| 303 |
+
margin-left: 3.5rem;
|
| 304 |
font-size: 1.1rem;
|
| 305 |
color: #2D3748;
|
| 306 |
}
|
|
|
|
| 327 |
align-items: center;
|
| 328 |
margin-bottom: 1rem;
|
| 329 |
}
|
| 330 |
+
|
| 331 |
+
/* ํ์ด์ง ์ ํ ์ ๋๋ฉ์ด์
*/
|
| 332 |
+
@keyframes pulse {
|
| 333 |
+
0%, 100% {
|
| 334 |
+
box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
|
| 335 |
+
}
|
| 336 |
+
50% {
|
| 337 |
+
box-shadow: 0 0 20px 10px rgba(16, 185, 129, 0);
|
| 338 |
+
}
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
/* ์ฑํ
ํ์ดํ ์คํ์ผ (์ ๋๋ฉ์ด์
์ ๊ฑฐ) */
|
| 342 |
+
.chat-title {
|
| 343 |
+
color: white !important;
|
| 344 |
+
font-weight: 900 !important;
|
| 345 |
+
font-size: 1.75rem !important;
|
| 346 |
+
margin-bottom: 1rem !important;
|
| 347 |
+
text-shadow:
|
| 348 |
+
0 0 30px rgba(255,255,255,0.6),
|
| 349 |
+
0 0 50px rgba(102,126,234,0.4),
|
| 350 |
+
3px 3px 40px rgba(0,0,0,0.4);
|
| 351 |
+
letter-spacing: 2px;
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
/* ์ฑํ
๋ด ํ์ด์ง ๋ฒํธ ๋ฒํผ ์คํ์ผ - ํด๋ฆญ ๊ฐ๋ฅํ ๋ฐ์ค์ฒ๋ผ */
|
| 355 |
+
[data-testid="column"] button[kind="secondary"] {
|
| 356 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 357 |
+
color: white !important;
|
| 358 |
+
border: none !important;
|
| 359 |
+
border-radius: 0.5rem !important;
|
| 360 |
+
padding: 0.6rem 1rem !important;
|
| 361 |
+
font-weight: bold !important;
|
| 362 |
+
font-size: 0.95rem !important;
|
| 363 |
+
text-align: left !important;
|
| 364 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
|
| 365 |
+
transition: all 0.2s ease !important;
|
| 366 |
+
cursor: pointer !important;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
[data-testid="column"] button[kind="secondary"]:hover {
|
| 370 |
+
transform: translateY(-2px) !important;
|
| 371 |
+
box-shadow: 0 4px 8px rgba(102, 126, 234, 0.3) !important;
|
| 372 |
+
background: linear-gradient(135deg, #7c8ff5 0%, #8a5db8 100%) !important;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
[data-testid="column"] button[kind="primary"] {
|
| 376 |
+
background: linear-gradient(135deg, #FEF08A 0%, #FDE047 100%) !important;
|
| 377 |
+
color: #854D0E !important;
|
| 378 |
+
border: 2px solid #EAB308 !important;
|
| 379 |
+
border-radius: 0.5rem !important;
|
| 380 |
+
padding: 0.6rem 1rem !important;
|
| 381 |
+
font-weight: bold !important;
|
| 382 |
+
font-size: 0.95rem !important;
|
| 383 |
+
text-align: left !important;
|
| 384 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
|
| 385 |
+
transition: all 0.2s ease !important;
|
| 386 |
+
cursor: pointer !important;
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
[data-testid="column"] button[kind="primary"]:hover {
|
| 390 |
+
transform: translateY(-2px) !important;
|
| 391 |
+
box-shadow: 0 4px 8px rgba(234, 179, 8, 0.3) !important;
|
| 392 |
+
background: linear-gradient(135deg, #FDE047 0%, #FACC15 100%) !important;
|
| 393 |
+
}
|
| 394 |
</style>
|
| 395 |
""", unsafe_allow_html=True)
|
| 396 |
|
|
|
|
| 419 |
st.session_state.highlight_config = HighlightConfig()
|
| 420 |
if 'processing_query' not in st.session_state:
|
| 421 |
st.session_state.processing_query = None
|
| 422 |
+
if 'scroll_to_page' not in st.session_state:
|
| 423 |
+
st.session_state.scroll_to_page = None
|
| 424 |
|
| 425 |
|
| 426 |
def extract_text_from_pdf(pdf_file) -> Tuple[List[str], List[Dict], bytes, Dict]:
|
|
|
|
| 896 |
|
| 897 |
highlighted_pages = set(h['page'] for h in highlight_info)
|
| 898 |
|
| 899 |
+
pdf_html = '<div class="pdf-container" id="pdf-viewer-container">'
|
| 900 |
|
| 901 |
for page_num in range(len(doc)):
|
| 902 |
page = doc[page_num]
|
|
|
|
| 909 |
# ์ค์ ์ด๋ฏธ์ง ํฌ๊ธฐ ๊ณ์ฐ (zoom_level์ ๋ฐ๋ผ)
|
| 910 |
zoom_percentage = int(zoom_level * 50) # 2.0 = 100%, 1.0 = 50%
|
| 911 |
|
| 912 |
+
# ๊ฐ ํ์ด์ง์ ๊ณ ์ ID ๋ถ์ฌ
|
| 913 |
+
page_id = f'page-{page_num + 1}'
|
| 914 |
+
pdf_html += f'<div id="{page_id}" style="margin-bottom: 2rem; position: relative;">'
|
| 915 |
|
| 916 |
# ํ์ด๋ผ์ดํธ ์ฌ๋ถ์ ๋ฐ๋ผ ํ์ด์ง ํค๋ ์คํ์ผ ๋ณ๊ฒฝ
|
| 917 |
if (page_num + 1) in highlighted_pages:
|
|
|
|
| 926 |
pdf_html += '</div>'
|
| 927 |
|
| 928 |
pdf_html += '</div>'
|
| 929 |
+
|
| 930 |
doc.close()
|
| 931 |
|
| 932 |
return pdf_html
|
|
|
|
| 938 |
|
| 939 |
# Header ๋ฌธ์ ์ฒ๋ฆฌ ์ ์๋ง ๋ณด์
|
| 940 |
if not st.session_state.processed:
|
| 941 |
+
st.markdown(f"""
|
| 942 |
+
<div class="plobin-header">
|
| 943 |
+
<img src="data:image/png;base64,{plobin_logo_base64}" class="plobin-logo" alt="PLOBIN" style="height: 60px; margin-bottom: 10px;">
|
| 944 |
+
<div class="plobin-subtitle">๋ฌธ์ ์ ๋ต์ ์ฐพ์์ฃผ๋ AI ๋น์</div>
|
| 945 |
</div>
|
| 946 |
""", unsafe_allow_html=True)
|
| 947 |
|
| 948 |
# ========== ์ฌ์ด๋๋ฐ ==========
|
| 949 |
with st.sidebar:
|
| 950 |
+
st.image("img/plobin.png", width=120) # ํฝ์
๊ฐ์ผ๋ก ์ง์ ์ง์
|
| 951 |
+
# st.title("๐ฎ PLOBIN")
|
| 952 |
|
| 953 |
uploaded_file = st.file_uploader(
|
| 954 |
"๋๋๊ทธํ์ฌ ํ์ผ์ ์
๋ก๋ ๋๋ ํด๋ฆญํ์ฌ ์ ํํ์ธ์.",
|
|
|
|
| 973 |
try:
|
| 974 |
chunks, metadata_list, pdf_bytes, pages_text = extract_text_from_pdf(uploaded_file)
|
| 975 |
|
| 976 |
+
with st.spinner("๐ค ๋ฌธ์๋ฅผ AI๊ฐ ์ดํดํ ์ ์๊ฒ ์ฒ๋ฆฌ ์ค.."):
|
| 977 |
collection, embedder = create_vector_db(chunks, metadata_list)
|
| 978 |
|
| 979 |
st.session_state.vector_db = collection
|
|
|
|
| 1002 |
# ์ฒญํฌ ํ์ ์ ๊ฑฐ๋จ
|
| 1003 |
# ============================================================
|
| 1004 |
|
| 1005 |
+
# st.divider()
|
| 1006 |
|
| 1007 |
# ์ด๊ธฐํ ๋ฒํผ
|
| 1008 |
+
# if st.button("๐ ์ ๋ฌธ์ ์
๋ก๋", use_container_width=True):
|
| 1009 |
+
# st.session_state.processed = False
|
| 1010 |
+
# st.session_state.vector_db = None
|
| 1011 |
+
# st.session_state.embedder = None
|
| 1012 |
+
# st.session_state.chat_history = []
|
| 1013 |
+
# st.session_state.current_highlights = []
|
| 1014 |
+
# st.session_state.pdf_bytes = None
|
| 1015 |
+
# st.session_state.pdf_pages_text = {}
|
| 1016 |
+
# st.session_state.zoom_level = 2.0
|
| 1017 |
+
# st.rerun()
|
| 1018 |
|
| 1019 |
# ===== ์์ง ๋ฌธ์๊ฐ ์ฒ๋ฆฌ๋์ง ์์ ๊ฒฝ์ฐ
|
| 1020 |
if not st.session_state.processed:
|
|
|
|
| 1023 |
<h2 style="text-align: center; color: #2D3748; margin-bottom: 1.5rem;">๐ ์ฌ์ฉ ๋ฐฉ๋ฒ</h2>
|
| 1024 |
<div class="guide-step">
|
| 1025 |
<div class="step-number">1</div>
|
| 1026 |
+
<div>๐ค PDF ํ์ผ์ ์ฌ๋ ค์ฃผ์ธ์</div>
|
| 1027 |
</div>
|
| 1028 |
<div class="guide-step">
|
| 1029 |
<div class="step-number">2</div>
|
| 1030 |
+
<div>๋ฌธ์ ์ฒ๋ฆฌ๊ฐ ์๋ฃ๋ ๋๊น์ง ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์</div>
|
| 1031 |
</div>
|
| 1032 |
<div class="guide-step">
|
| 1033 |
<div class="step-number">3</div>
|
| 1034 |
+
<div>๋ฌธ์ ๋ด ๊ถ๊ธํ ๋ด์ฉ์ ๋ฌผ์ด๋ณด์ธ์</div>
|
| 1035 |
</div>
|
| 1036 |
<div class="guide-step">
|
| 1037 |
<div class="step-number">4</div>
|
| 1038 |
+
<div>AI๊ฐ ์ ํํ ๋ต๋ณ๊ณผ ์ถ์ฒ๋ฅผ ํจ๊ป ์๋ ค๋๋ ค์</div>
|
| 1039 |
</div>
|
| 1040 |
</div>
|
| 1041 |
""", unsafe_allow_html=True)
|
|
|
|
| 1070 |
st.session_state.zoom_level
|
| 1071 |
)
|
| 1072 |
st.markdown(pdf_html, unsafe_allow_html=True)
|
| 1073 |
+
|
| 1074 |
+
# ์คํฌ๋กค ๊ธฐ๋ฅ - JavaScript๋ก ๊ตฌํ
|
| 1075 |
+
if st.session_state.scroll_to_page:
|
| 1076 |
+
scroll_js = f"""
|
| 1077 |
+
<script>
|
| 1078 |
+
// PDF ์ปจํ
์ด๋ ์ฐพ๊ธฐ
|
| 1079 |
+
const container = parent.document.querySelector('.pdf-container');
|
| 1080 |
+
const targetPage = parent.document.getElementById('page-{st.session_state.scroll_to_page}');
|
| 1081 |
+
|
| 1082 |
+
if (container && targetPage) {{
|
| 1083 |
+
// ์ปจํ
์ด๋ ๋ด์์ ํ๊ฒ ํ์ด์ง์ ์์น ๊ณ์ฐ
|
| 1084 |
+
const containerRect = container.getBoundingClientRect();
|
| 1085 |
+
const targetRect = targetPage.getBoundingClientRect();
|
| 1086 |
+
const scrollTop = container.scrollTop;
|
| 1087 |
+
const offset = targetRect.top - containerRect.top + scrollTop;
|
| 1088 |
+
|
| 1089 |
+
// ๋ถ๋๋ฝ๊ฒ ์คํฌ๋กค
|
| 1090 |
+
container.scrollTo({{
|
| 1091 |
+
top: offset - 20,
|
| 1092 |
+
behavior: 'smooth'
|
| 1093 |
+
}});
|
| 1094 |
+
}}
|
| 1095 |
+
</script>
|
| 1096 |
+
"""
|
| 1097 |
+
components.html(scroll_js, height=0)
|
| 1098 |
+
st.session_state.scroll_to_page = None
|
| 1099 |
|
| 1100 |
with col2:
|
| 1101 |
+
st.markdown('<h3 class="chat-title">๐ฎ PLOBIN CHAT</h3>', unsafe_allow_html=True)
|
| 1102 |
|
| 1103 |
# ์ฑํ
ํ์คํ ๋ฆฌ๋ฅผ ๋ด์ ์ปจํ
์ด๋
|
| 1104 |
chat_container = st.container(height=650)
|
| 1105 |
|
| 1106 |
with chat_container:
|
| 1107 |
+
for msg_idx, msg in enumerate(st.session_state.chat_history):
|
| 1108 |
with st.chat_message(msg["role"]):
|
| 1109 |
st.markdown(msg["content"])
|
| 1110 |
|
| 1111 |
if msg["role"] == "assistant" and "sources" in msg:
|
| 1112 |
with st.expander("๐ ์ฐธ์กฐ ๋ฌธ์"):
|
| 1113 |
+
for idx, (doc, meta) in enumerate(zip(msg["sources"]["docs"], msg["sources"]["metas"])):
|
|
|
|
|
|
|
|
|
|
| 1114 |
# ํ
์คํธ๋ฅผ 150์๋ก ์ ํํ๊ณ ๊ฐ๊ฒฐํ๊ฒ ํ์
|
| 1115 |
clean_text = doc[:150] + ('...' if len(doc) > 150 else '')
|
| 1116 |
|
| 1117 |
+
# ํ์ด์ง ๋ฒํธ ๋ฒํผ (๋ฐ์ค์ฒ๋ผ ๋ณด์ด๊ฒ) - msg_idx ์ถ๊ฐ๋ก ๊ณ ์ ํค ์์ฑ
|
| 1118 |
+
if st.button(
|
| 1119 |
+
f"๐ ํ์ด์ง {meta['page']}",
|
| 1120 |
+
key=f"goto_source_msg{msg_idx}_{meta['page']}_{idx}",
|
| 1121 |
+
use_container_width=True,
|
| 1122 |
+
type="secondary"
|
| 1123 |
+
):
|
| 1124 |
+
st.session_state.scroll_to_page = meta['page']
|
| 1125 |
+
st.rerun()
|
| 1126 |
+
|
| 1127 |
+
# ๋ฌธ์ ๋ด์ฉ ํ์
|
| 1128 |
st.markdown(f"""
|
| 1129 |
+
<div style="background: #F1F5F9; padding: 0.8rem; border-radius: 0.5rem; margin-bottom: 1rem; border-left: 3px solid #667eea;">
|
| 1130 |
+
<div style="font-size: 0.9rem; color: #475569;">
|
|
|
|
|
|
|
|
|
|
| 1131 |
{clean_text}
|
| 1132 |
</div>
|
| 1133 |
</div>
|
|
|
|
| 1142 |
# ํ
์คํธ๋ฅผ 150์๋ก ์ ํ
|
| 1143 |
display_text = selected_text[:150] + ('...' if len(selected_text) > 150 else '')
|
| 1144 |
|
| 1145 |
+
# ํ์ด์ง ๋ฒํธ ๋ฒํผ (ํ์ด๋ผ์ดํธ ์คํ์ผ) - msg_idx ์ถ๊ฐ๋ก ๊ณ ์ ํค ์์ฑ
|
| 1146 |
+
if st.button(
|
| 1147 |
+
f"โญ ํ์ด์ง {grok_data.get('page', '?')}",
|
| 1148 |
+
key=f"goto_grok_msg{msg_idx}_{grok_data.get('page', 0)}",
|
| 1149 |
+
use_container_width=True,
|
| 1150 |
+
type="primary"
|
| 1151 |
+
):
|
| 1152 |
+
st.session_state.scroll_to_page = grok_data.get('page', 1)
|
| 1153 |
+
st.rerun()
|
| 1154 |
+
|
| 1155 |
+
# ์ ํ๋ ํ
์คํธ ํ์
|
| 1156 |
st.markdown(f"""
|
| 1157 |
+
<div style="background: #FEF08A; color: #854D0E; padding: 0.8rem; border-radius: 0.5rem; margin-top: 0.5rem; border-left: 4px solid #EAB308;">
|
| 1158 |
+
<div style="font-size: 0.9rem;">{display_text}</div>
|
|
|
|
| 1159 |
</div>
|
| 1160 |
""", unsafe_allow_html=True)
|
| 1161 |
|
|
|
|
| 1173 |
query = st.session_state.processing_query
|
| 1174 |
st.session_state.processing_query = None # ํ๋๊ทธ ๋ฆฌ์
|
| 1175 |
|
| 1176 |
+
with st.spinner("๐ฎ PLOBIN์ด ๊ฒ์์ค์
๋๋ค..."):
|
| 1177 |
try:
|
| 1178 |
# 1. ํ์ด๋ธ๋ฆฌ๋ ๊ฒ์ (๋ฒกํฐ + ํค์๋) - ์์ 3๊ฐ
|
| 1179 |
search_results = hybrid_search(
|
|
|
|
| 1184 |
)
|
| 1185 |
|
| 1186 |
# 2. Grok API๋ก ๊ฒ์ฆ ๋ฐ ์ถ์ถ
|
| 1187 |
+
grok_result = grok_verify_and_extract(
|
| 1188 |
+
query,
|
| 1189 |
+
search_results,
|
| 1190 |
+
GROK_API_KEY
|
| 1191 |
+
)
|
|
|
|
| 1192 |
|
| 1193 |
# 3. ๋ต๋ณ ์์ฑ
|
| 1194 |
answer = generate_answer(
|
|
|
|
| 1201 |
highlights = extract_highlights_from_grok(grok_result)
|
| 1202 |
st.session_state.current_highlights = highlights
|
| 1203 |
|
| 1204 |
+
# 5. Grok์ด ์ ํํ ํ์ด์ง๋ก ์๋ ์คํฌ๋กค ์ค์
|
| 1205 |
+
if grok_result and "page" in grok_result and "error" not in grok_result:
|
| 1206 |
+
st.session_state.scroll_to_page = grok_result["page"]
|
| 1207 |
+
|
| 1208 |
+
# 6. ์ฑํ
ํ์คํ ๋ฆฌ์ ๋ต๋ณ ์ ์ฅ
|
| 1209 |
chat_data = {
|
| 1210 |
"role": "assistant",
|
| 1211 |
"content": answer,
|