streamlit / src /app.py
lhss0520's picture
Upload src/app.py with huggingface_hub
2f6c4ad verified
import os
import re
import csv
import uuid
import html
import json
import fitz
from pathlib import Path
from datetime import datetime
from typing import TypedDict, Literal
import streamlit as st
import streamlit.components.v1 as components
from openai import OpenAI
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
from langgraph.graph import StateGraph, END
# =========================================================
# 0. κΈ°λ³Έ μ„€μ •
# =========================================================
st.set_page_config(
page_title="AIVLE ν•™μŠ΅λ„μš°λ―Έ",
page_icon="πŸ€–",
layout="wide"
)
BASE_DIR = Path(__file__).resolve().parent
# 톡합 λ°±μ„œ PDF ν•˜λ‚˜λ§Œ μ‚¬μš©ν•©λ‹ˆλ‹€.
# 파일λͺ…이 λ‹€λ₯Ό 수 μžˆμ–΄, μ•„λž˜ 후보 μˆœμ„œλŒ€λ‘œ λ¨Όμ € μ‘΄μž¬ν•˜λŠ” PDFλ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€.
PDF_CANDIDATES = [
BASE_DIR / "AIVLE_School_λ°±μ„œ_톡합본.pdf",
]
PDF_PATH = next((path for path in PDF_CANDIDATES if path.exists()), PDF_CANDIDATES[0])
UPLOAD_DIR = BASE_DIR / "uploads"
TEMP_DIR = BASE_DIR / "temp"
IMAGE_BANNER_PATH = BASE_DIR / "images" / "이미지.png"
LOGO_PATH = BASE_DIR / "images" / "μ—μ΄λΈ”ν•™μŠ΅λ„μš°λ―Έ 둜고.png"
UPLOAD_DIR.mkdir(exist_ok=True)
TEMP_DIR.mkdir(exist_ok=True)
client = OpenAI()
# =========================================================
# 1. CSS: 1번 μ‚¬μ΄λ“œλ°”/둜그인 UI + 2번 μ±„νŒ… UI κ²°ν•©
# =========================================================
st.markdown("""
<style>
.main .block-container {
padding-top: 2rem;
max-width: 1100px;
}
.hero-box {
background: #d9f3f2;
border-radius: 22px;
padding: 34px 38px;
margin-bottom: 28px;
display: flex;
justify-content: space-between;
align-items: center;
}
.hero-title {
font-size: 30px;
font-weight: 800;
color: #111827;
margin-bottom: 10px;
}
.hero-sub {
font-size: 15px;
color: #6b7280;
}
.robot {
font-size: 72px;
}
.section-title {
font-size: 17px;
font-weight: 800;
margin: 20px 0 12px 0;
}
.login-lock-box {
background: #f9fafb;
border: 1px solid #e5e7eb;
border-radius: 18px;
padding: 36px;
text-align: center;
margin-top: 80px;
}
.login-lock-title {
font-size: 26px;
font-weight: 800;
margin-bottom: 10px;
}
.login-lock-sub {
color: #6b7280;
font-size: 15px;
}
section[data-testid="stSidebar"] {
background-color: #f8fafc;
}
.sidebar-title {
font-size: 22px;
font-weight: 900;
color: #111827;
margin-bottom: 4px;
}
.sidebar-subtitle {
font-size: 13px;
color: #6b7280;
line-height: 1.5;
margin-bottom: 18px;
}
.account-card {
background: #ecfdf5;
border: 1px solid #d1fae5;
border-radius: 16px;
padding: 14px 16px;
margin-bottom: 12px;
}
.account-name {
font-size: 15px;
font-weight: 800;
color: #065f46;
}
.account-status {
font-size: 12px;
color: #059669;
margin-top: 4px;
}
.sidebar-section-title {
font-size: 13px;
font-weight: 800;
color: #374151;
margin-top: 22px;
margin-bottom: 8px;
}
.history-caption {
font-size: 11px;
color: #9ca3af;
margin-top: -6px;
margin-bottom: 8px;
}
.sidebar-help {
font-size: 12px;
color: #9ca3af;
line-height: 1.5;
}
.chat-wrap {
border-top: 1px solid #e5e7eb;
margin-top: 28px;
padding-top: 26px;
}
.user-bubble {
background: #d9f3f2;
padding: 14px 18px;
border-radius: 18px;
width: fit-content;
max-width: 70%;
margin-left: auto;
margin-bottom: 14px;
font-weight: 500;
line-height: 1.6;
word-break: break-word;
}
.assistant-card {
background: white;
border: 1px solid #e5e7eb;
border-radius: 18px;
padding: 20px;
margin-top: 8px;
margin-bottom: 8px;
max-width: 780px;
box-shadow: 0 2px 10px rgba(0,0,0,0.04);
line-height: 1.7;
word-break: break-word;
}
.assistant-name {
font-size: 14px;
color: #6b7280;
font-weight: 700;
margin-bottom: 8px;
}
.message-tools {
display: flex;
gap: 8px;
margin-bottom: 18px;
}
</style>
""", unsafe_allow_html=True)
# =========================================================
# 2. μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™”
# =========================================================
def init_session_state():
if "logged_in" not in st.session_state:
st.session_state.logged_in = False
if "user_id" not in st.session_state:
st.session_state.user_id = ""
if "user_name" not in st.session_state:
st.session_state.user_name = ""
if "page" not in st.session_state:
st.session_state.page = "home"
if "chats" not in st.session_state:
st.session_state.chats = {}
if "current_chat_id" not in st.session_state:
st.session_state.current_chat_id = None
if "recommended_questions" not in st.session_state:
st.session_state.recommended_questions = [
"μΆœμ„ 인정 μš”μ²­μ€ μ–΄λ–»κ²Œ ν•˜λ‚˜μš”?",
"κ°•μ˜ λ‹€μ‹œλ³΄κΈ°λ₯Ό ν•  수 μžˆλ‚˜μš”?",
"ν›ˆλ ¨μž₯λ €κΈˆμ€ μ–Έμ œ μ§€κΈ‰λ˜λ‚˜μš”?",
"개인 포트폴리였둜 ν™œμš©ν•  수 μžˆλŠ” λ²”μœ„κ°€ μ–΄λ–»κ²Œ λ˜λ‚˜μš”?"
]
if "rag_upload_signature" not in st.session_state:
st.session_state.rag_upload_signature = None
if "retriever" not in st.session_state:
st.session_state.retriever = None
if "rag_chain" not in st.session_state:
st.session_state.rag_chain = None
if "format_docs" not in st.session_state:
st.session_state.format_docs = None
init_session_state()
# =========================================================
# 3. λŒ€ν™” μ„Έμ…˜ 관리 ν•¨μˆ˜: 1번 μ½”λ“œ 기반
# =========================================================
def create_new_chat():
chat_id = str(uuid.uuid4())
st.session_state.chats[chat_id] = {
"title": "μƒˆ λŒ€ν™”",
"created_at": datetime.now().strftime("%Y-%m-%d %H:%M"),
"messages": []
}
st.session_state.current_chat_id = chat_id
st.session_state.page = "home"
def get_current_messages():
if st.session_state.current_chat_id is None:
create_new_chat()
return st.session_state.chats[st.session_state.current_chat_id]["messages"]
def update_chat_title(question):
chat_id = st.session_state.current_chat_id
if chat_id is None:
return
current_title = st.session_state.chats[chat_id]["title"]
if current_title == "μƒˆ λŒ€ν™”":
title = question.strip()
if len(title) > 24:
title = title[:24] + "..."
st.session_state.chats[chat_id]["title"] = title
def delete_chat(chat_id):
if chat_id in st.session_state.chats:
del st.session_state.chats[chat_id]
if st.session_state.current_chat_id == chat_id:
if st.session_state.chats:
latest_chat_id = list(st.session_state.chats.keys())[-1]
st.session_state.current_chat_id = latest_chat_id
else:
create_new_chat()
st.session_state.page = "home"
# =========================================================
# 4. λ¬Έμ„œ λ‘œλ”© ν•¨μˆ˜: 1번 μ½”λ“œ 기반 + UPLOAD_DIR 보완
# =========================================================
def safe_filename(filename):
filename = re.sub(r"[^κ°€-힣a-zA-Z0-9_.-]", "_", filename)
return filename
def load_txt_file(file_path):
try:
text = file_path.read_text(encoding="utf-8")
except UnicodeDecodeError:
text = file_path.read_text(encoding="cp949")
return [
Document(
page_content=text,
metadata={"source": str(file_path)}
)
]
def load_csv_file(file_path):
rows = []
try:
f = open(file_path, "r", encoding="utf-8")
except UnicodeDecodeError:
f = open(file_path, "r", encoding="cp949")
with f:
reader = csv.reader(f)
for row in reader:
rows.append(" | ".join(row))
text = "\n".join(rows)
return [
Document(
page_content=text,
metadata={"source": str(file_path)}
)
]
def load_pdf_file(file_path):
loader = PyMuPDFLoader(str(file_path))
return loader.load()
def load_faq_from_pdf(pdf_path):
"""
μ—…λ‘œλ“œλœ 톡합 λ°±μ„œ PDF μ•ˆμ˜ 제4μž₯ FAQ ν‘œλ₯Ό μ½μ–΄μ„œ
category/question/answer ꡬ쑰둜 λ³€ν™˜ν•œλ‹€.
이 ν•¨μˆ˜λŠ” 별도 FAQ PDFλ₯Ό μ‚¬μš©ν•˜μ§€ μ•ŠλŠ”λ‹€.
ν˜„μž¬ 파일처럼 FAQκ°€ 18~31μͺ½μ— μ—¬λŸ¬ ν‘œλ‘œ λ‚˜λ‰˜μ–΄ 있고,
일뢀 νŽ˜μ΄μ§€μ—λŠ” ν‘œ 헀더가 λ°˜λ³΅λ˜μ§€ μ•Šκ±°λ‚˜ 닡변이 λ‹€μŒ νŽ˜μ΄μ§€λ‘œ μ΄μ–΄μ§€λŠ” κ²½μš°κΉŒμ§€ μ²˜λ¦¬ν•œλ‹€.
"""
if not pdf_path.exists():
st.warning(f"톡합 λ°±μ„œ PDF νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: {pdf_path}")
return []
faq_data = []
# 이 PDFμ—μ„œ μ‹€μ œ FAQ ν‘œμ— μ“°μ΄λŠ” μΉ΄ν…Œκ³ λ¦¬λ§Œ ν—ˆμš©ν•œλ‹€.
# μ΄λ ‡κ²Œ ν•΄μ•Ό 1~8κΈ° 우수 ν”„λ‘œμ νŠΈ λͺ©λ‘ 같은 λ‹€λ₯Έ ν‘œκ°€ FAQ둜 잘λͺ» λ“€μ–΄κ°€λŠ” 것을 막을 수 μžˆλ‹€.
valid_categories = {
"λͺ¨μ§‘/μ„ λ°œ",
"μΆœμ„",
"κ°•μ˜",
"KDT",
"AIVLE-EDU",
"ꡐ윑μž₯",
"기타",
"μ½”λ”© ν•™μŠ΅",
"λ…ΈνŠΈλΆ",
"κ΅­λ―Όμ·¨μ—…μ œλ„",
"취업지원/μ±„μš©μ—°κ³„",
"ν•™μŠ΅",
"ν”„λ‘œμ νŠΈ",
}
def clean_text(text):
if text is None:
return ""
text = str(text).replace("\n", " ")
text = re.sub(r"\s+", " ", text)
return text.strip()
def normalize_category(text):
category = clean_text(text)
# PDF ν‘œ μΆ”μΆœ μ‹œ μ€„λ°”κΏˆ λ•Œλ¬Έμ— μΉ΄ν…Œκ³ λ¦¬κ°€ μͺΌκ°œμ§€λŠ” 경우 보정
category = category.replace("κ΅­λ―Όμ·¨μ—…μ œ 도", "κ΅­λ―Όμ·¨μ—…μ œλ„")
category = category.replace("취업지원/ μ±„μš©μ—°κ³„", "취업지원/μ±„μš©μ—°κ³„")
return category
try:
doc = fitz.open(str(pdf_path))
in_faq_section = False
last_item = None
for page in doc:
page_text = page.get_text("text")
# λͺ©μ°¨μ—λ„ "제4μž₯" 문ꡬ가 있기 λ•Œλ¬Έμ—, FAQ λ³Έλ¬Έ μ•ˆλ‚΄ λ¬Έμž₯ λ˜λŠ” μ‹€μ œ λ³Έλ¬Έ νŽ˜μ΄μ§€ 쑰건으둜 μ‹œμž‘μ μ„ μž‘λŠ”λ‹€.
if (
"FAQ λŠ” 지원 μ „ ꢁ금증" in page_text
or (
re.search(r"제\s*4\s*μž₯\.?\s*자주 λ¬»λŠ” 질문", page_text)
and "λͺ© μ°¨" not in page_text
and page.number > 5
)
):
in_faq_section = True
if not in_faq_section:
continue
tables = page.find_tables()
for table in tables:
rows = table.extract()
for row in rows:
# PyMuPDFκ°€ 9μ—΄μ§œλ¦¬ ν‘œμ²˜λŸΌ 읽더라도 빈 칸을 μ œκ±°ν•˜λ©΄
# μ‹€μ œ 값은 ["ꡬ뢄", "질문", "λ‹΅λ³€"] λ˜λŠ” [μΉ΄ν…Œκ³ λ¦¬, 질문, λ‹΅λ³€] ν˜•νƒœκ°€ λœλ‹€.
cells = [clean_text(cell) for cell in row if clean_text(cell)]
if not cells:
continue
# ν‘œ 헀더 제거
if cells == ["ꡬ뢄", "질문", "λ‹΅λ³€"]:
continue
# 이전 닡변이 λ‹€μŒ νŽ˜μ΄μ§€/λ‹€μŒ ν–‰μœΌλ‘œ 이어진 경우
# 예: μ•ž νŽ˜μ΄μ§€ λ§ˆμ§€λ§‰ λ‹΅λ³€μ˜ λ‚˜λ¨Έμ§€ λ¬Έμž₯이 λ‹€μŒ νŽ˜μ΄μ§€ 첫 행에 λ‹¨λ…μœΌλ‘œ μž‘νžˆλŠ” 경우
if len(cells) == 1:
if last_item and not cells[0].startswith("KT AIVLE"):
last_item["answer"] = f"{last_item['answer']}\n{cells[0]}".strip()
continue
if len(cells) < 3:
continue
category = normalize_category(cells[0])
question = clean_text(cells[1])
answer = clean_text(cells[2])
if category not in valid_categories:
continue
if not question or not answer:
continue
if question == "질문" or answer == "λ‹΅λ³€":
continue
item = {
"category": category,
"question": question,
"answer": answer
}
faq_data.append(item)
last_item = item
# 제5μž₯이 λ‚˜μ˜€λ©΄ FAQλŠ” λλ‚œλ‹€.
# 같은 νŽ˜μ΄μ§€μ— FAQ ν‘œκ°€ λ¨Όμ € 있고 제5μž₯이 μ•„λž˜μ— λΆ™μ–΄ μžˆμ„ 수 μžˆμœΌλ―€λ‘œ
# ν˜„μž¬ νŽ˜μ΄μ§€ μ²˜λ¦¬λŠ” 마친 λ’€ μ’…λ£Œν•œλ‹€.
if re.search(r"제\s*5\s*μž₯\.?\s*μ„ λ°°", page_text):
break
doc.close()
except Exception as e:
st.warning(f"톡합 λ°±μ„œμ—μ„œ FAQ ν‘œ μΆ”μΆœ 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}")
return []
# 쀑볡 제거: 같은 질문이 μš”μ•½ FAQ ν‘œμ— ν•œ 번 더 λ‚˜μ˜¬ 수 μžˆμœΌλ―€λ‘œ 질문 κΈ°μ€€μœΌλ‘œ ν•œ 번만 ν‘œμ‹œ
unique_faq_data = []
seen_questions = set()
for item in faq_data:
normalized_question = re.sub(r"\s+", " ", item["question"]).strip()
if normalized_question in seen_questions:
continue
seen_questions.add(normalized_question)
unique_faq_data.append(item)
return unique_faq_data
def save_uploaded_files(uploaded_files):
saved_paths = []
if not uploaded_files:
return saved_paths
for uploaded_file in uploaded_files:
filename = safe_filename(uploaded_file.name)
save_path = UPLOAD_DIR / filename
with open(save_path, "wb") as f:
f.write(uploaded_file.getbuffer())
saved_paths.append(save_path)
return saved_paths
def load_all_documents(uploaded_files):
docs = []
if PDF_PATH.exists():
docs.extend(load_pdf_file(PDF_PATH))
else:
st.warning(f"κΈ°λ³Έ λ°±μ„œ PDFλ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: {PDF_PATH}")
# FAQλŠ” 별도 PDFκ°€ μ•„λ‹ˆλΌ 톡합 λ°±μ„œ PDF μ•ˆμ— ν¬ν•¨λ˜μ–΄ μžˆμœΌλ―€λ‘œ
# μ—¬κΈ°μ—μ„œ λ”°λ‘œ μΆ”κ°€ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. PDF_PATH ν•˜λ‚˜λ§Œ λ¬Έμ„œλ‘œ μ‚¬μš©ν•©λ‹ˆλ‹€.
saved_paths = save_uploaded_files(uploaded_files)
for path in saved_paths:
suffix = path.suffix.lower()
if suffix == ".pdf":
docs.extend(load_pdf_file(path))
elif suffix == ".txt":
docs.extend(load_txt_file(path))
elif suffix == ".csv":
docs.extend(load_csv_file(path))
return docs
def get_upload_signature(uploaded_files):
if not uploaded_files:
return "no_upload"
return "|".join([f"{f.name}:{f.size}" for f in uploaded_files])
FAQ_DATA = load_faq_from_pdf(PDF_PATH)
if not FAQ_DATA:
st.warning("톡합 λ°±μ„œμ—μ„œ FAQ 데이터λ₯Ό λΆˆλŸ¬μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€. FAQ ν‘œ ꡬ쑰λ₯Ό ν™•μΈν•˜μ„Έμš”.")
# =========================================================
# 5. RAG ꡬ성 ν•¨μˆ˜: 1번 μ½”λ“œ 기반
# =========================================================
def build_rag(uploaded_files):
docs = load_all_documents(uploaded_files)
if not docs:
st.error("RAG에 μ‚¬μš©ν•  λ¬Έμ„œκ°€ μ—†μŠ΅λ‹ˆλ‹€. κΈ°λ³Έ λ°±μ„œ PDF λ˜λŠ” μ—…λ‘œλ“œ νŒŒμΌμ„ ν™•μΈν•˜μ„Έμš”.")
st.stop()
splitter = RecursiveCharacterTextSplitter(
chunk_size=1200,
chunk_overlap=200,
separators=["\n\n", "\n", ".", " ", ""]
)
chunks = splitter.split_documents(docs)
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.from_documents(chunks, embedding)
retriever = vectorstore.as_retriever(
search_kwargs={"k": 8}
)
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0
)
prompt = ChatPromptTemplate.from_template("""
당신은 AIVLE School λ°±μ„œ, FAQ, μ‚¬μš©μžκ°€ μ—…λ‘œλ“œν•œ 자료λ₯Ό 기반으둜 λ‹΅λ³€ν•˜λŠ” ν•™μŠ΅λ„μš°λ―Έ μ±—λ΄‡μž…λ‹ˆλ‹€.
κ·œμΉ™:
1. λ°˜λ“œμ‹œ [λ¬Έμ„œ λ‚΄μš©]에 κ·Όκ±°ν•΄μ„œ λ‹΅λ³€ν•˜μ„Έμš”.
2. λ¬Έμ„œμ—μ„œ ν™•μΈλ˜μ§€ μ•ŠλŠ” λ‚΄μš©μ€ "λ¬Έμ„œμ—μ„œ ν™•μΈλ˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€."라고 λ‹΅ν•˜μ„Έμš”.
3. ν•œκ΅­μ–΄λ‘œ μΉœμ ˆν•˜κ³  κ°„κ²°ν•˜κ²Œ λ‹΅λ³€ν•˜μ„Έμš”.
4. 질문과 직접 κ΄€λ ¨λœ λ‚΄μš©λ§Œ λ‹΅λ³€ν•˜μ„Έμš”.
[λ¬Έμ„œ λ‚΄μš©]
{context}
[질문]
{question}
""")
chain = prompt | llm | StrOutputParser()
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
return retriever, chain, format_docs
def ensure_rag_ready(uploaded_files):
current_signature = get_upload_signature(uploaded_files)
if (
st.session_state.retriever is None
or st.session_state.rag_chain is None
or st.session_state.rag_upload_signature != current_signature
):
with st.spinner("λ¬Έμ„œλ₯Ό 읽고 AI 검색 인덱슀λ₯Ό μ€€λΉ„ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€..."):
retriever, rag_chain, format_docs = build_rag(uploaded_files)
st.session_state.retriever = retriever
st.session_state.rag_chain = rag_chain
st.session_state.format_docs = format_docs
st.session_state.rag_upload_signature = current_signature
# =========================================================
# 6. FAQ 기반 μΆ”μ²œ 질문: 1번 μ½”λ“œ 기반
# =========================================================
@st.cache_resource
def build_faq_retriever():
faq_docs = []
for item in FAQ_DATA:
faq_docs.append(
Document(
page_content=item["question"],
metadata={
"category": item["category"],
"answer": item["answer"]
}
)
)
if not faq_docs:
return None
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
faq_vectorstore = FAISS.from_documents(faq_docs, embedding)
return faq_vectorstore.as_retriever(search_kwargs={"k": 4})
def recommend_questions(user_question):
try:
faq_retriever = build_faq_retriever()
if faq_retriever is None:
return []
docs = faq_retriever.invoke(user_question)
recommended = []
for doc in docs:
q = doc.page_content.strip()
if q and q not in recommended:
recommended.append(q)
return recommended[:4]
except Exception as e:
st.warning(f"μΆ”μ²œμ§ˆλ¬Έ 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}")
return []
# =========================================================
# 7. LangGraph 질문 λΆ„κΈ° 처리: 1번 μ½”λ“œ 기반
# =========================================================
class ChatState(TypedDict):
question: str
route: str
context: str
answer: str
def classify_question_node(state: ChatState) -> ChatState:
question = state["question"]
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0
)
prompt = ChatPromptTemplate.from_template("""
당신은 μ‚¬μš©μž μ§ˆλ¬Έμ„ λΆ„λ₯˜ν•˜λŠ” λΌμš°ν„°μž…λ‹ˆλ‹€.
μ•„λž˜ 기쀀에 따라 μ§ˆλ¬Έμ„ λ°˜λ“œμ‹œ λ‘˜ 쀑 ν•˜λ‚˜λ‘œλ§Œ λΆ„λ₯˜ν•˜μ„Έμš”.
[AIVLE]
- KT AIVLE School, μ—μ΄λΈ”μŠ€μΏ¨, μ—μ΄λΈ”λŸ¬ κ΄€λ ¨ 질문
- μΆœμ„, 결석, 지각, 쑰퇴, μ™ΈμΆœ, μΆœμ„ 인정 κ΄€λ ¨ 질문
- κ°•μ˜, λ‹€μ‹œλ³΄κΈ°, ꡐ윑 일정, 체크인/체크아웃 κ΄€λ ¨ 질문
- KDT, ν›ˆλ ¨μž₯렀금, λ‚΄μΌλ°°μ›€μΉ΄λ“œ, κ΅­λ―Όμ·¨μ—…μ§€μ›μ œλ„ κ΄€λ ¨ 질문
- ꡐ윑μž₯, λ…ΈνŠΈλΆ, μ½”λ”©λ§ˆμŠ€ν„°μŠ€, μ½”λ”© ν•™μŠ΅ ν”Œλž«νΌ κ΄€λ ¨ 질문
- μ±„μš©μ—°κ³„, 포트폴리였, AX μ±Œλ¦°μ§€ λ“± μ—μ΄λΈ”μŠ€μΏ¨ μ œλ„ κ΄€λ ¨ 질문
- μ‚¬μš©μžκ°€ μ—…λ‘œλ“œν•œ λ°±μ„œ/FAQ/λ¬Έμ„œμ—μ„œ λ‹΅ν•΄μ•Ό ν•  질문
[GENERAL]
- μœ„μ™€ λ¬΄κ΄€ν•œ 일반 지식, μ½”λ”©, μƒν™œ, μŒμ‹, 건강, λ¬Έμ„œ μž‘μ„±, λ²ˆμ—­, 상담 질문
- AIVLE λ¬Έμ„œ κ·Όκ±°κ°€ ν•„μš” μ—†λŠ” 일반 질문
좜λ ₯ κ·œμΉ™:
- AIVLE 관련이면 AIVLE
- 일반 질문이면 GENERAL
- λ‹€λ₯Έ μ„€λͺ… 없이 단어 ν•˜λ‚˜λ§Œ 좜λ ₯
[μ‚¬μš©μž 질문]
{question}
""")
chain = prompt | llm | StrOutputParser()
route = chain.invoke({
"question": question
}).strip().upper()
if "AIVLE" in route:
state["route"] = "aivle"
else:
state["route"] = "general"
return state
def route_condition(state: ChatState) -> Literal["aivle", "general"]:
return state["route"]
def aivle_rag_node(state: ChatState) -> ChatState:
question = state["question"]
docs = st.session_state.retriever.invoke(question)
context = st.session_state.format_docs(docs)
answer = st.session_state.rag_chain.invoke({
"context": context,
"question": question
})
state["context"] = context
state["answer"] = answer
return state
def general_llm_node(state: ChatState) -> ChatState:
question = state["question"]
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.3
)
prompt = ChatPromptTemplate.from_template("""
당신은 μΉœμ ˆν•œ AI ν•™μŠ΅ λ„μš°λ―Έμž…λ‹ˆλ‹€.
μ‚¬μš©μžμ˜ μ§ˆλ¬Έμ— λŒ€ν•΄ ν•œκ΅­μ–΄λ‘œ μžμ—°μŠ€λŸ½κ³  μ΄ν•΄ν•˜κΈ° μ‰½κ²Œ λ‹΅λ³€ν•˜μ„Έμš”.
[μ‚¬μš©μž 질문]
{question}
""")
chain = prompt | llm | StrOutputParser()
answer = chain.invoke({
"question": question
})
state["context"] = ""
state["answer"] = answer
return state
def build_question_graph():
graph = StateGraph(ChatState)
graph.add_node("classify", classify_question_node)
graph.add_node("aivle_rag", aivle_rag_node)
graph.add_node("general_llm", general_llm_node)
graph.set_entry_point("classify")
graph.add_conditional_edges(
"classify",
route_condition,
{
"aivle": "aivle_rag",
"general": "general_llm"
}
)
graph.add_edge("aivle_rag", END)
graph.add_edge("general_llm", END)
return graph.compile()
def run_langgraph_answer(question):
app = build_question_graph()
result = app.invoke({
"question": question,
"route": "",
"context": "",
"answer": ""
})
return result
# =========================================================
# 8. μ±„νŒ… λΆ€κ°€ κΈ°λŠ₯: 2번 μ½”λ“œ 기반
# =========================================================
def copy_button(text, key):
text_json = json.dumps(text, ensure_ascii=False)
components.html(
f"""
<button onclick='navigator.clipboard.writeText({text_json})'
style="
margin-top:4px;
padding:6px 12px;
border-radius:8px;
border:1px solid #d1d5db;
background:white;
cursor:pointer;
font-size:13px;
">
πŸ“‹ 볡사
</button>
""",
height=40
)
def generate_tts(text, filename):
speech_file = TEMP_DIR / f"{filename}.mp3"
response = client.audio.speech.create(
model="gpt-4o-mini-tts",
voice="nova",
input=text
)
response.stream_to_file(str(speech_file))
return str(speech_file)
def transcribe_audio(audio_file):
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcript.text
def render_text_for_html(text):
return html.escape(text).replace("\n", "<br>")
def answer_question(question):
messages = get_current_messages()
messages.append({
"role": "user",
"content": question
})
update_chat_title(question)
result = run_langgraph_answer(question)
answer = result["answer"]
route = result["route"]
if route == "aivle":
route_label = "πŸ“š FAQ/λ°±μ„œ 기반 λ‹΅λ³€"
else:
route_label = "πŸ’¬ 일반 AI λ‹΅λ³€"
final_answer = f"{route_label}\n\n{answer}"
messages.append({
"role": "assistant",
"content": final_answer
})
if route == "aivle":
st.session_state.recommended_questions = recommend_questions(question)
else:
st.session_state.recommended_questions = []
# =========================================================
# 9. μ‚¬μ΄λ“œλ°”: 1번 μ½”λ“œ 기반
# =========================================================
uploaded_files = None
with st.sidebar:
if LOGO_PATH.exists():
st.image(str(LOGO_PATH), width=240)
else:
st.markdown('<div class="sidebar-title">AIVLE ν•™μŠ΅λ„μš°λ―Έ</div>', unsafe_allow_html=True)
st.markdown(
'<div class="sidebar-subtitle">λ°±μ„œμ™€ μ—…λ‘œλ“œ 자료λ₯Ό 기반으둜<br>ν•™μŠ΅ μ§ˆλ¬Έμ— λ‹΅λ³€ν•˜λŠ” AI μ±—λ΄‡μž…λ‹ˆλ‹€.</div>',
unsafe_allow_html=True
)
st.markdown('<div class="sidebar-section-title">πŸ‘€ 계정</div>', unsafe_allow_html=True)
if not st.session_state.logged_in:
with st.form("login_form", clear_on_submit=False):
login_name = st.text_input("이름", placeholder="이름을 μž…λ ₯ν•˜μ„Έμš”")
login_id = st.text_input("아이디", placeholder="아이디λ₯Ό μž…λ ₯ν•˜μ„Έμš”")
login_pw = st.text_input("λΉ„λ°€λ²ˆν˜Έ", type="password", placeholder="λΉ„λ°€λ²ˆν˜Έλ₯Ό μž…λ ₯ν•˜μ„Έμš”")
login_btn = st.form_submit_button("둜그인", use_container_width=True)
if login_btn:
if login_name.strip() and login_id.strip() and login_pw.strip():
st.session_state.logged_in = True
st.session_state.user_name = login_name.strip()
st.session_state.user_id = login_id.strip()
if st.session_state.current_chat_id is None:
create_new_chat()
st.rerun()
else:
st.warning("이름, 아이디, λΉ„λ°€λ²ˆν˜Έλ₯Ό λͺ¨λ‘ μž…λ ₯ν•˜μ„Έμš”.")
else:
st.markdown(
f"""
<div class="account-card">
<div class="account-name">{html.escape(st.session_state.user_name)}λ‹˜</div>
<div class="account-status">둜그인 쀑</div>
</div>
""",
unsafe_allow_html=True
)
if st.button("λ‘œκ·Έμ•„μ›ƒ", use_container_width=True):
st.session_state.logged_in = False
st.session_state.user_name = ""
st.session_state.user_id = ""
st.session_state.page = "home"
st.rerun()
st.divider()
if st.button("οΌ‹ μƒˆ λŒ€ν™” μ‹œμž‘", use_container_width=True, disabled=not st.session_state.logged_in):
create_new_chat()
st.rerun()
col_home, col_faq = st.columns(2)
with col_home:
if st.button("🏠 ν™ˆ", use_container_width=True, disabled=not st.session_state.logged_in):
st.session_state.page = "home"
st.rerun()
with col_faq:
if st.button("❔ FAQ", use_container_width=True, disabled=not st.session_state.logged_in):
st.session_state.page = "faq"
st.rerun()
st.markdown('<div class="sidebar-section-title">πŸ“Ž 자료 μΆ”κ°€</div>', unsafe_allow_html=True)
with st.expander("파일 μ—…λ‘œλ“œ", expanded=False):
uploaded_files = st.file_uploader(
"PDF, TXT, CSV νŒŒμΌμ„ μΆ”κ°€ν•  수 μžˆμŠ΅λ‹ˆλ‹€.",
type=["pdf", "txt", "csv"],
accept_multiple_files=True,
disabled=not st.session_state.logged_in
)
if uploaded_files:
st.success(f"{len(uploaded_files)}개 파일이 μΆ”κ°€λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
else:
st.markdown(
'<div class="sidebar-help">μ—…λ‘œλ“œν•œ νŒŒμΌμ€ κΈ°μ‘΄ λ°±μ„œμ™€ ν•¨κ»˜ AI 닡변에 ν™œμš©λ©λ‹ˆλ‹€.</div>',
unsafe_allow_html=True
)
st.divider()
st.markdown('<div class="sidebar-section-title">πŸ’¬ λŒ€ν™” 기둝</div>', unsafe_allow_html=True)
if not st.session_state.logged_in:
st.caption("둜그인 ν›„ λŒ€ν™” 기둝을 μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€.")
elif st.session_state.chats:
chat_items = list(st.session_state.chats.items())[::-1]
for chat_id, chat_info in chat_items:
title = chat_info["title"]
created_at = chat_info["created_at"]
col_chat, col_delete = st.columns([5, 1])
with col_chat:
if st.button(f"πŸ’¬ {title}", key=f"chat_{chat_id}", use_container_width=True):
st.session_state.current_chat_id = chat_id
st.session_state.page = "home"
st.rerun()
with col_delete:
if st.button("πŸ—‘", key=f"delete_{chat_id}", use_container_width=True):
delete_chat(chat_id)
st.rerun()
st.markdown(
f"<div class='history-caption'>{created_at}</div>",
unsafe_allow_html=True
)
else:
st.caption("아직 λŒ€ν™” 기둝이 μ—†μŠ΅λ‹ˆλ‹€.")
# =========================================================
# 10. 둜그인 μ „ κΈ°λŠ₯ 차단: 1번 μ½”λ“œ 기반
# =========================================================
if not st.session_state.logged_in:
st.markdown("""
<div class="login-lock-box">
<div class="login-lock-title">πŸ”’ 둜그인이 ν•„μš”ν•©λ‹ˆλ‹€</div>
<div class="login-lock-sub">
μ‚¬μ΄λ“œλ°”μ—μ„œ 이름, 아이디, λΉ„λ°€λ²ˆν˜Έλ₯Ό μž…λ ₯ν•΄μ•Ό<br>
챗봇, FAQ, 파일 μ—…λ‘œλ“œ κΈ°λŠ₯을 μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
</div>
</div>
""", unsafe_allow_html=True)
st.stop()
# =========================================================
# 11. 둜그인 ν›„ RAG μ€€λΉ„
# =========================================================
ensure_rag_ready(uploaded_files)
# =========================================================
# 12. FAQ ν™”λ©΄: 1번 μ½”λ“œ 기반
# =========================================================
def render_faq_page():
st.markdown("## ❔ FAQ")
st.caption("자주 λ¬»λŠ” μ§ˆλ¬Έμ„ μΉ΄ν…Œκ³ λ¦¬λ³„λ‘œ 확인할 수 μžˆμŠ΅λ‹ˆλ‹€.")
if not FAQ_DATA:
st.warning("톡합 λ°±μ„œμ—μ„œ FAQ 데이터λ₯Ό λΆˆλŸ¬μ˜€μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€. FAQ ν‘œ ꡬ쑰λ₯Ό ν™•μΈν•˜μ„Έμš”.")
return
categories = ["전체"] + sorted(list(set(item["category"] for item in FAQ_DATA)))
selected_tabs = st.tabs(categories)
for idx, tab in enumerate(selected_tabs):
category = categories[idx]
with tab:
if category == "전체":
items = FAQ_DATA
else:
items = [
item for item in FAQ_DATA
if item["category"] == category
]
if not items:
st.info("ν•΄λ‹Ή μΉ΄ν…Œκ³ λ¦¬μ˜ FAQκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
continue
for item in items:
with st.expander(f"[{item['category']}] {item['question']}"):
st.write(item["answer"])
# =========================================================
# 13. ν™ˆ / μ±„νŒ… ν™”λ©΄: 2번 μ½”λ“œ μ±„νŒ… UI 기반 + 1번 μΆ”μ²œ 질문 μœ μ§€
# =========================================================
def render_home_page():
if IMAGE_BANNER_PATH.exists():
st.image(str(IMAGE_BANNER_PATH), use_container_width=True)
else:
st.markdown(f"""
<div class="hero-box">
<div>
<div class="hero-title">μ•ˆλ…•ν•˜μ„Έμš”, {html.escape(st.session_state.user_name)}λ‹˜! 😊<br>무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?</div>
<div class="hero-sub">AIVLE λ°±μ„œμ™€ μ—…λ‘œλ“œν•œ 자료λ₯Ό 기반으둜 λ‹΅λ³€λ“œλ¦΄κ²Œμš”.</div>
</div>
<div class="robot">πŸ€–</div>
</div>
""", unsafe_allow_html=True)
# -------------------------
# FAQ 기반 μΆ”μ²œ 질문
# -------------------------
st.markdown('<div class="section-title">μΆ”μ²œ 질문</div>', unsafe_allow_html=True)
rec_questions = st.session_state.recommended_questions
if rec_questions:
cols = st.columns(2)
for i, q in enumerate(rec_questions):
with cols[i % 2]:
if st.button(q, key=f"rec_{i}_{q}", use_container_width=True):
answer_question(q)
st.rerun()
else:
st.caption("AIVLE κ΄€λ ¨ μ§ˆλ¬Έμ„ μž…λ ₯ν•˜λ©΄ FAQ 기반 μΆ”μ²œ 질문이 ν‘œμ‹œλ©λ‹ˆλ‹€.")
# -------------------------
# μΉ΄λ“œν˜• μ±„νŒ… λ©”μ‹œμ§€ 좜λ ₯
# -------------------------
messages = get_current_messages()
st.markdown('<div class="chat-wrap">', unsafe_allow_html=True)
if not messages:
st.markdown("""
<div style="min-height: 420px;">
<div class="assistant-name">πŸ€– AIVLE λ„μš°λ―Έ</div>
<div class="assistant-card">
μ•ˆλ…•ν•˜μ„Έμš”! AIVLE λ°±μ„œμ™€ FAQ, μ—…λ‘œλ“œν•œ λ¬Έμ„œλ₯Ό 기반으둜 μ§ˆλ¬Έμ— λ‹΅λ³€λ“œλ¦΄κ²Œμš”.<br>
일반 μ§ˆλ¬Έμ€ λ¬Έμ„œ 검색 없이 일반 AI λ‹΅λ³€μœΌλ‘œ μ•ˆλ‚΄ν•©λ‹ˆλ‹€.
</div>
</div>
""", unsafe_allow_html=True)
for idx, msg in enumerate(messages):
content_html = render_text_for_html(msg["content"])
if msg["role"] == "user":
st.markdown(
f'<div class="user-bubble">{content_html}</div>',
unsafe_allow_html=True
)
else:
st.markdown(
f"""
<div class="assistant-name">πŸ€– AIVLE λ„μš°λ―Έ</div>
<div class="assistant-card">{content_html}</div>
""",
unsafe_allow_html=True
)
col_copy, col_tts = st.columns([1, 5])
with col_copy:
copy_button(msg["content"], key=f"copy_{idx}")
with col_tts:
if st.button("πŸ”Š μŒμ„±μœΌλ‘œ λ“£κΈ°", key=f"tts_{idx}"):
with st.spinner("μŒμ„±μ„ μƒμ„±ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€..."):
audio_path = generate_tts(msg["content"], f"audio_{idx}_{st.session_state.current_chat_id}")
st.audio(audio_path)
st.markdown('</div>', unsafe_allow_html=True)
# -------------------------
# ν…μŠ€νŠΈ + μŒμ„± μž…λ ₯
# -------------------------
col_text, col_audio = st.columns([5, 1])
with col_text:
user_input = st.chat_input("λ©”μ‹œμ§€λ₯Ό μž…λ ₯ν•˜μ„Έμš”...")
with col_audio:
audio_file = st.audio_input(
"🎀 μŒμ„± 질문",
label_visibility="collapsed"
)
if user_input:
answer_question(user_input)
st.rerun()
if audio_file is not None:
with st.spinner("μŒμ„±μ„ λΆ„μ„ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€..."):
question = transcribe_audio(audio_file)
answer_question(question)
st.rerun()
# =========================================================
# 14. ν™”λ©΄ λΌμš°νŒ…
# =========================================================
if st.session_state.page == "faq":
render_faq_page()
else:
render_home_page()