gswrag_test / src /streamlit_app.py
wldud7568's picture
Update src/streamlit_app.py
f3b0be8 verified
import streamlit as st
import os
import json
import re
import numpy as np
from typing import List, Dict, Tuple, Optional
from pathlib import Path
import logging
from sentence_transformers import SentenceTransformer
import faiss
import json
from rank_bm25 import BM25Okapi
# κΈ°λ³Έ λ‘œκΉ… μ„€μ •
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# νŽ˜μ΄μ§€ μ„€μ •
st.set_page_config(
page_title="ν•˜μ΄λΈŒλ¦¬λ“œ μ°¨λŸ‰ μ •λΉ„ 검색 μ‹œμŠ€ν…œ",
page_icon="πŸ”§",
layout="wide",
initial_sidebar_state="expanded"
)
# CSS μŠ€νƒ€μΌ
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
color: #1f4e79;
text-align: center;
margin-bottom: 2rem;
font-weight: bold;
}
.search-container {
background-color: #f8f9fa;
padding: 2rem;
border-radius: 10px;
margin-bottom: 2rem;
border-left: 5px solid #1f4e79;
}
.result-card {
background-color: white;
padding: 1.5rem;
border-radius: 8px;
margin-bottom: 1rem;
border: 1px solid #dee2e6;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.score-badge {
background-color: #e3f2fd;
color: #1565c0;
padding: 0.25rem 0.75rem;
border-radius: 15px;
font-size: 0.8rem;
font-weight: bold;
}
.category-badge {
background-color: #f3e5f5;
color: #7b1fa2;
padding: 0.25rem 0.75rem;
border-radius: 15px;
font-size: 0.8rem;
margin-right: 0.5rem;
}
.content-text {
background-color: #f8f9fa;
padding: 1rem;
border-radius: 5px;
border-left: 3px solid #28a745;
margin-top: 1rem;
line-height: 1.6;
}
.metric-card {
background-color: #e8f5e8;
padding: 1rem;
border-radius: 5px;
text-align: center;
margin: 0.5rem;
}
</style>
""", unsafe_allow_html=True)
# κ°„λ‹¨ν•œ λΆ€ν’ˆ 사전 (μ‹€μ œ vocab.py λŒ€μ‹  μ‚¬μš©)
PARTS = [
"μˆ˜λ™λ³€μ†κΈ°", "클러치", "브레이크", "μ—”μ§„", "타이어", "배터리",
"였일", "ν•„ν„°", "벨트", "호슀", "νŽŒν”„", "μ„Όμ„œ", "νŠΈλžœμŠ€λ―Έμ…˜",
"λ””μŠ€ν¬", "νŒ¨λ“œ", "슈", "λ‘œν„°", "캘리퍼", "λ§ˆμŠ€ν„°μ‹€λ¦°λ”"
]
# κ°„λ‹¨ν•œ μ‹œμŠ€ν…œ λ§€ν•‘ (μ‹€μ œ parts_config.py λŒ€μ‹  μ‚¬μš©)
SYSTEM_PARTS_MAP = {
"μˆ˜λ™λ³€μ†κΈ°": ["클러치", "변속기", "λ“œλΌμ΄λΈŒμƒ€ν”„νŠΈ", "λ””νΌλ Œμ…œ"],
"μ—”μ§„": ["ν”ΌμŠ€ν†€", "싀린더", "ν¬λž­ν¬μƒ€ν”„νŠΈ", "μΊ μƒ€ν”„νŠΈ"],
"브레이크": ["λΈŒλ ˆμ΄ν¬νŒ¨λ“œ", "λΈŒλ ˆμ΄ν¬λ””μŠ€ν¬", "캘리퍼", "λ§ˆμŠ€ν„°μ‹€λ¦°λ”"]
}
def get_specific_parts_for_system(system_name: str) -> list:
return SYSTEM_PARTS_MAP.get(system_name, [])
def get_all_specific_parts() -> list:
all_parts = []
for parts in SYSTEM_PARTS_MAP.values():
all_parts.extend(parts)
return list(set(all_parts))
class SimpleMecab:
"""MeCab λŒ€μ‹  μ‚¬μš©ν•  κ°„λ‹¨ν•œ ν˜•νƒœμ†Œ 뢄석기"""
def pos(self, text):
# κ°„λ‹¨ν•œ λͺ…사/동사 μΆ”μΆœ (μ‹€μ œ ν™˜κ²½μ—μ„œλŠ” MeCab μ‚¬μš©)
words = text.split()
return [(word, 'NN') for word in words if len(word) > 1]
class HybridMultiCollectionSearcher:
def __init__(self, model_name: str = "upskyy/bge-m3-korean", target_system: str = None):
"""
ν•˜μ΄λΈŒλ¦¬λ“œ 닀쀑 μ»¬λ ‰μ…˜ 검색기 (벑터 + ν‚€μ›Œλ“œ 검색)
"""
self.model = None # λ‚˜μ€‘μ— λ‘œλ“œ
self.collections = {}
self.bm25_indexes = {}
self.target_system = target_system
self.mecab = SimpleMecab() # κ°„λ‹¨ν•œ 뢄석기 μ‚¬μš©
self.model_name = model_name
@st.cache_resource
def load_model(_self):
"""λͺ¨λΈμ„ μΊμ‹œμ™€ ν•¨κ»˜ λ‘œλ“œ"""
try:
return SentenceTransformer(_self.model_name)
except Exception as e:
st.error(f"λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
return None
def _extract_nouns_and_verbs(self, text: str) -> str:
"""κ°„λ‹¨ν•œ λͺ…사와 동사 μΆ”μΆœ"""
try:
# λΆ€ν’ˆλͺ… μš°μ„  처리
for part in PARTS:
if part in text:
text = text.replace(part, f" {part} ")
# κ°„λ‹¨ν•œ λͺ…사 μΆ”μΆœ (μ‹€μ œλ‘œλŠ” MeCab μ‚¬μš©)
morphs = self.mecab.pos(text)
meaningful_words = []
for word, pos in morphs:
if len(word) > 1 and not word.isspace():
meaningful_words.append(word)
return ' '.join(meaningful_words)
except Exception as e:
return text
def _normalize_text_for_matching(self, text: str) -> str:
normalized = text.lower()
normalized = re.sub(r'[.]', '', normalized)
return normalized
def _normalize_scores(self, scores: np.ndarray) -> np.ndarray:
"""점수λ₯Ό 0-1 λ²”μœ„λ‘œ μ •κ·œν™”"""
scores = np.array(scores)
if len(scores) == 0 or scores.max() == scores.min():
return np.ones_like(scores) * 0.5
return (scores - scores.min()) / (scores.max() - scores.min())
def _calculate_boost_score(self, original_query: str, processed_query: str, metadata: Dict, content: str) -> float:
"""κ°„λ‹¨ν•œ λΆ€μŠ€νŒ… 점수 계산"""
boost_score = 0
query_lower = original_query.lower()
# μ½˜ν…μΈ  νƒ€μž… λ§€μΉ­
content_type = metadata.get('content_type', '')
if 'νƒˆκ±°' in query_lower and 'νƒˆκ±°' in content_type:
boost_score += 0.5
if 'μž₯μ°©' in query_lower and 'μž₯μ°©' in content_type:
boost_score += 0.5
if '점검' in query_lower and '점검' in content_type:
boost_score += 0.5
# μ‹œμŠ€ν…œ λ§€μΉ­
system = metadata.get('vehicle_info', {}).get('system', '')
if system and any(word in system.lower() for word in query_lower.split()):
boost_score += 0.3
return boost_score
def create_sample_collection(self, collection_name: str):
"""μƒ˜ν”Œ λ°μ΄ν„°λ‘œ μ»¬λ ‰μ…˜ 생성"""
try:
if self.model is None:
self.model = self.load_model()
if self.model is None:
return False
# μƒ˜ν”Œ 데이터
sample_data = [
{
'chunk_id': 'sample_001',
'content': 'μˆ˜λ™λ³€μ†κΈ° νƒˆκ±° μ‹œμ—λŠ” λ¨Όμ € 엔진을 μ •μ§€ν•˜κ³  변속기 μ˜€μΌμ„ λ°°μΆœν•©λ‹ˆλ‹€. 클러치λ₯Ό λΆ„λ¦¬ν•œ ν›„ 변속기λ₯Ό νƒˆκ±°ν•©λ‹ˆλ‹€.',
'metadata': {
'chunk_id': 'sample_001',
'content_type': 'νƒˆκ±°λ°©λ²•',
'main_topic': 'μˆ˜λ™λ³€μ†κΈ° νƒˆκ±°',
'vehicle_info': {'system': 'μˆ˜λ™λ³€μ†κΈ°', 'model': 'μ—μ–΄λ‘œμ‹œν‹°'},
'category_levels': ['변속기', 'μˆ˜λ™λ³€μ†κΈ°', 'νƒˆκ±°λ°©λ²•'],
'extracted_components': ['변속기', '클러치']
}
},
{
'chunk_id': 'sample_002',
'content': 'μˆ˜λ™λ³€μ†κΈ° μž₯착은 νƒˆκ±°μ˜ μ—­μˆœμœΌλ‘œ μ§„ν–‰ν•©λ‹ˆλ‹€. 변속기λ₯Ό μ •ν™•ν•œ μœ„μΉ˜μ— κ³ μ •ν•˜κ³  클러치λ₯Ό μ—°κ²°ν•©λ‹ˆλ‹€.',
'metadata': {
'chunk_id': 'sample_002',
'content_type': 'μž₯착방법',
'main_topic': 'μˆ˜λ™λ³€μ†κΈ° μž₯μ°©',
'vehicle_info': {'system': 'μˆ˜λ™λ³€μ†κΈ°', 'model': 'μ—μ–΄λ‘œμ‹œν‹°'},
'category_levels': ['변속기', 'μˆ˜λ™λ³€μ†κΈ°', 'μž₯착방법'],
'extracted_components': ['변속기', '클러치']
}
},
{
'chunk_id': 'sample_003',
'content': '변속기 였일 점검 μ‹œ 였일 레벨과 였일 μƒνƒœλ₯Ό ν™•μΈν•©λ‹ˆλ‹€. κ·œμ •λŸ‰μ€ 2.5L이며 였일 μ˜¨λ„λŠ” 80Β°Cμ—μ„œ μΈ‘μ •ν•©λ‹ˆλ‹€.',
'metadata': {
'chunk_id': 'sample_003',
'content_type': 'μ κ²€μ ˆμ°¨',
'main_topic': '였일 점검',
'vehicle_info': {'system': 'μˆ˜λ™λ³€μ†κΈ°', 'model': 'μ—μ–΄λ‘œμ‹œν‹°'},
'category_levels': ['변속기', 'μˆ˜λ™λ³€μ†κΈ°', 'μ κ²€μ ˆμ°¨'],
'extracted_components': ['였일']
}
}
]
# 검색 ν…μŠ€νŠΈ 생성
search_texts = []
metadata_list = []
content_dict = {}
for data in sample_data:
metadata = data['metadata']
content = data['content']
# κ²€μƒ‰μš© ν…μŠ€νŠΈ ꡬ성
search_components = [
metadata.get('content_type', ''),
metadata.get('main_topic', ''),
' '.join(metadata.get('category_levels', [])),
content
]
search_text = self._extract_nouns_and_verbs(' '.join(search_components))
search_texts.append(search_text)
metadata_list.append(metadata)
content_dict[metadata['chunk_id']] = content
# 벑터 μž„λ² λ”© 생성
embeddings = self.model.encode(search_texts, show_progress_bar=False)
# FAISS 인덱슀 생성
embedding_dim = embeddings.shape[1]
faiss.normalize_L2(embeddings)
faiss_index = faiss.IndexFlatIP(embedding_dim)
faiss_index.add(embeddings.astype(np.float32))
# BM25 인덱슀 생성
tokenized_docs = [text.split() for text in search_texts]
bm25_index = BM25Okapi(tokenized_docs)
# μ»¬λ ‰μ…˜ μ €μž₯
self.collections[collection_name] = {
'metadata_list': metadata_list,
'content_dict': content_dict,
'search_texts': search_texts,
'faiss_index': faiss_index
}
self.bm25_indexes[collection_name] = bm25_index
return True
except Exception as e:
logger.error(f"μƒ˜ν”Œ μ»¬λ ‰μ…˜ 생성 μ‹€νŒ¨: {e}")
return False
"""μ €μž₯된 ν•˜μ΄λΈŒλ¦¬λ“œ μ»¬λ ‰μ…˜λ“€ λ‘œλ“œ (FAISS + BM25) - pickle 없이"""
save_dir = Path(save_dir)
if not save_dir.exists():
logger.warning(f"μ»¬λ ‰μ…˜ 디렉토리가 μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€: {save_dir}")
return False
loaded_collections = []
for collection_dir in save_dir.iterdir():
if collection_dir.is_dir():
collection_name = collection_dir.name
try:
# 1. FAISS 인덱슀 λ‘œλ“œ
faiss_path = collection_dir / "faiss.index"
if not faiss_path.exists():
logger.warning(f"FAISS μΈλ±μŠ€κ°€ μ—†μŠ΅λ‹ˆλ‹€: {faiss_path}")
continue
faiss_index = faiss.read_index(str(faiss_path))
# 2. BM25 토큰 데이터 λ‘œλ“œ (JSON)
bm25_tokens_path = collection_dir / "bm25_tokens.json"
if not bm25_tokens_path.exists():
logger.warning(f"BM25 토큰 데이터가 μ—†μŠ΅λ‹ˆλ‹€: {bm25_tokens_path}")
continue
with open(bm25_tokens_path, 'r', encoding='utf-8') as f:
tokenized_docs = json.load(f)
# BM25 인덱슀 μž¬μƒμ„±
bm25_index = BM25Okapi(tokenized_docs)
# 3. 메타데이터 λ‘œλ“œ (JSON)
metadata_path = collection_dir / "metadata.json"
if not metadata_path.exists():
logger.warning(f"메타데이터가 μ—†μŠ΅λ‹ˆλ‹€: {metadata_path}")
continue
with open(metadata_path, 'r', encoding='utf-8') as f:
save_data = json.load(f)
# μ»¬λ ‰μ…˜ 볡원
self.collections[collection_name] = {
'faiss_index': faiss_index,
**save_data
}
self.bm25_indexes[collection_name] = bm25_index
loaded_collections.append(collection_name)
logger.info(f"μ»¬λ ‰μ…˜ '{collection_name}' λ‘œλ“œ μ™„λ£Œ")
except Exception as e:
logger.error(f"μ»¬λ ‰μ…˜ '{collection_name}' λ‘œλ“œ μ‹€νŒ¨: {e}")
continue
if loaded_collections:
logger.info(f"ν•˜μ΄λΈŒλ¦¬λ“œ μ»¬λ ‰μ…˜ λ‘œλ“œ μ™„λ£Œ: {loaded_collections}")
return True
else:
logger.error("λ‘œλ“œλœ μ»¬λ ‰μ…˜μ΄ μ—†μŠ΅λ‹ˆλ‹€.")
return False
def list_collections(self) -> List[str]:
"""λ“±λ‘λœ μ»¬λ ‰μ…˜ λͺ©λ‘ λ°˜ν™˜"""
return list(self.collections.keys())
def search_collection(self, collection_name: str, query: str, top_k: int = 5, alpha: float = 0.7) -> List[Dict]:
"""ν•˜μ΄λΈŒλ¦¬λ“œ 검색 μˆ˜ν–‰"""
if collection_name not in self.collections:
return []
if self.model is None:
self.model = self.load_model()
if self.model is None:
return []
collection = self.collections[collection_name]
faiss_index = collection['faiss_index']
metadata_list = collection['metadata_list']
content_dict = collection['content_dict']
bm25_index = self.bm25_indexes[collection_name]
# 쿼리 처리
processed_query = self._extract_nouns_and_verbs(query)
# 벑터 검색
query_embedding = self.model.encode([processed_query])
faiss.normalize_L2(query_embedding)
search_k = min(len(metadata_list), top_k * 3)
dense_similarities, dense_indices = faiss_index.search(
query_embedding.astype(np.float32), search_k
)
# ν‚€μ›Œλ“œ 검색
query_tokens = processed_query.split()
sparse_scores = bm25_index.get_scores(query_tokens)
# 점수 μ •κ·œν™”
dense_scores_norm = self._normalize_scores(dense_similarities[0])
sparse_scores_norm = self._normalize_scores(sparse_scores)
# κ²°κ³Ό 생성
results = []
for i, (similarity, idx) in enumerate(zip(dense_similarities[0], dense_indices[0])):
if idx == -1:
continue
metadata = metadata_list[idx]
chunk_id = metadata['chunk_id']
content = content_dict.get(chunk_id, '')
dense_score = dense_scores_norm[i]
sparse_score = sparse_scores_norm[idx] if idx < len(sparse_scores_norm) else 0
boost_score = self._calculate_boost_score(query, processed_query, metadata, content)
hybrid_score = (alpha * dense_score + (1 - alpha) * sparse_score + boost_score)
category_levels = metadata.get('category_levels', [])
category_path = ' > '.join(category_levels)
result = {
'chunk_id': chunk_id,
'content': content,
'metadata': metadata,
'dense_similarity': float(similarity),
'dense_score': dense_score,
'sparse_score': sparse_score,
'boost_score': boost_score,
'hybrid_score': hybrid_score,
'vehicle_info': metadata.get('vehicle_info', {}),
'content_type': metadata.get('content_type', ''),
'main_topic': metadata.get('main_topic', ''),
'category_path': category_path,
'processed_query': processed_query,
}
results.append(result)
results.sort(key=lambda x: x['hybrid_score'], reverse=True)
return results[:top_k]
# Streamlit μ•± μ‹œμž‘
def main():
# 제λͺ©
st.markdown('<h1 class="main-header">πŸ”§ ν•˜μ΄λΈŒλ¦¬λ“œ μ°¨λŸ‰ μ •λΉ„ 검색 μ‹œμŠ€ν…œ</h1>', unsafe_allow_html=True)
# μ‚¬μ΄λ“œλ°”
with st.sidebar:
st.header("βš™οΈ μ„€μ •")
# 검색 νŒŒλΌλ―Έν„°
st.subheader("검색 μ„€μ •")
top_k = st.slider("결과 개수", min_value=1, max_value=10, value=5)
alpha = st.slider("벑터 검색 κ°€μ€‘μΉ˜", min_value=0.0, max_value=1.0, value=0.7, step=0.1)
st.info(f"벑터 검색: {alpha:.1f}, ν‚€μ›Œλ“œ 검색: {1-alpha:.1f}")
# μ‹œμŠ€ν…œ 선택
st.subheader("λŒ€μƒ μ‹œμŠ€ν…œ")
target_system = st.selectbox(
"μ‹œμŠ€ν…œ 선택",
["μˆ˜λ™λ³€μ†κΈ°", "μ—”μ§„", "브레이크"],
index=0
)
# 메인 μ˜μ—­
# 검색기 μ΄ˆκΈ°ν™”
if 'searcher' not in st.session_state:
with st.spinner('검색 μ‹œμŠ€ν…œ μ΄ˆκΈ°ν™” 쀑...'):
try:
st.session_state.searcher = HybridMultiCollectionSearcher(target_system=target_system)
# λ¨Όμ € μƒ˜ν”Œ λ°μ΄ν„°λ‘œ ν…ŒμŠ€νŠΈ
st.info("πŸ§ͺ μƒ˜ν”Œ λ°μ΄ν„°λ‘œ ν…ŒμŠ€νŠΈ 쀑...")
success = st.session_state.searcher.create_sample_collection("ν…ŒμŠ€νŠΈ")
if success:
st.success("βœ… μƒ˜ν”Œ 검색 μ‹œμŠ€ν…œμ΄ μ€€λΉ„λ˜μ—ˆμŠ΅λ‹ˆλ‹€!")
st.info("πŸ’‘ μ‹€μ œ μ»¬λ ‰μ…˜μ„ μ‚¬μš©ν•˜λ €λ©΄ `saved_collections` 폴더λ₯Ό μ—…λ‘œλ“œν•˜μ„Έμš”.")
else:
st.error("❌ μ‹œμŠ€ν…œ μ΄ˆκΈ°ν™”μ— μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€.")
except Exception as e:
st.error(f"❌ μ΄ˆκΈ°ν™” 였λ₯˜: {str(e)}")
st.info("πŸ”§ 문제λ₯Ό ν•΄κ²°ν•˜λŠ” μ€‘μž…λ‹ˆλ‹€...")
# 검색기가 μžˆλŠ” κ²½μš°μ—λ§Œ μ§„ν–‰
if 'searcher' in st.session_state:
available_collections = st.session_state.searcher.list_collections()
# μ»¬λ ‰μ…˜μ΄ μžˆλŠ” κ²½μš°μ—λ§Œ 검색 μΈν„°νŽ˜μ΄μŠ€ ν‘œμ‹œ
if available_collections:
# μ»¬λ ‰μ…˜ 선택
st.subheader("πŸ“š 검색 λŒ€μƒ μ»¬λ ‰μ…˜")
selected_collection = st.selectbox(
"μ»¬λ ‰μ…˜ 선택",
available_collections,
help="검색할 μ»¬λ ‰μ…˜μ„ μ„ νƒν•˜μ„Έμš”"
)
# 검색 μΈν„°νŽ˜μ΄μŠ€
with st.container():
st.markdown('<div class="search-container">', unsafe_allow_html=True)
# 검색어 μž…λ ₯
query = st.text_input(
"πŸ” μ§ˆλ¬Έμ„ μž…λ ₯ν•˜μ„Έμš”",
placeholder="예: μˆ˜λ™λ³€μ†κΈ° νƒˆκ±°λŠ” μ–΄λ–»κ²Œ ν•˜λ‚˜μš”?",
help="μ°¨λŸ‰ 정비에 κ΄€ν•œ μ§ˆλ¬Έμ„ 자유둭게 μž…λ ₯ν•˜μ„Έμš”."
)
# 검색 λ²„νŠΌ
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
search_button = st.button("πŸ” κ²€μƒ‰ν•˜κΈ°", type="primary", use_container_width=True)
st.markdown('</div>', unsafe_allow_html=True)
# 검색 μ‹€ν–‰
if search_button and query:
with st.spinner('검색 쀑...'):
results = st.session_state.searcher.search_collection(
selected_collection,
query,
top_k=top_k,
alpha=alpha
)
if results:
st.success(f"βœ… {len(results)}개의 검색 κ²°κ³Όλ₯Ό μ°Ύμ•˜μŠ΅λ‹ˆλ‹€.")
# 검색 톡계
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown('<div class="metric-card"><strong>검색 κ²°κ³Ό</strong><br>' + f'{len(results)}개</div>', unsafe_allow_html=True)
with col2:
avg_score = np.mean([r['hybrid_score'] for r in results])
st.markdown('<div class="metric-card"><strong>평균 점수</strong><br>' + f'{avg_score:.3f}</div>', unsafe_allow_html=True)
with col3:
max_score = max([r['hybrid_score'] for r in results])
st.markdown('<div class="metric-card"><strong>졜고 점수</strong><br>' + f'{max_score:.3f}</div>', unsafe_allow_html=True)
with col4:
st.markdown('<div class="metric-card"><strong>μ»¬λ ‰μ…˜</strong><br>' + f'{selected_collection}</div>', unsafe_allow_html=True)
st.markdown("---")
# 검색 κ²°κ³Ό ν‘œμ‹œ
for i, result in enumerate(results, 1):
st.markdown('<div class="result-card">', unsafe_allow_html=True)
# 헀더
col1, col2 = st.columns([3, 1])
with col1:
st.markdown(f"### πŸ“„ κ²°κ³Ό {i}: {result['main_topic']}")
with col2:
st.markdown(f'<span class="score-badge">점수: {result["hybrid_score"]:.3f}</span>', unsafe_allow_html=True)
# 메타데이터
col1, col2 = st.columns(2)
with col1:
st.markdown(f'<span class="category-badge">{result["content_type"]}</span>', unsafe_allow_html=True)
st.markdown(f"**경둜:** {result['category_path']}")
with col2:
if result['vehicle_info']:
vehicle = result['vehicle_info']
st.markdown(f"**μ°¨λŸ‰:** {vehicle.get('model', 'N/A')}")
st.markdown(f"**μ‹œμŠ€ν…œ:** {vehicle.get('system', 'N/A')}")
# λ‚΄μš©
st.markdown('<div class="content-text">', unsafe_allow_html=True)
st.markdown(f"**πŸ“‹ λ‚΄μš©:**\n\n{result['content']}")
st.markdown('</div>', unsafe_allow_html=True)
# 상세 점수 (ν™•μž₯ κ°€λŠ₯)
with st.expander("πŸ” 상세 점수 보기"):
score_col1, score_col2, score_col3 = st.columns(3)
with score_col1:
st.metric("벑터 점수", f"{result['dense_score']:.3f}")
with score_col2:
st.metric("ν‚€μ›Œλ“œ 점수", f"{result['sparse_score']:.3f}")
with score_col3:
st.metric("λΆ€μŠ€νŒ… 점수", f"{result['boost_score']:.3f}")
st.markdown(f"**처리된 쿼리:** `{result['processed_query']}`")
st.markdown(f"**청크 ID:** `{result['chunk_id']}`")
st.markdown('</div>', unsafe_allow_html=True)
st.markdown("---")
else:
st.warning("πŸ€” 검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€. λ‹€λ₯Έ ν‚€μ›Œλ“œλ‘œ κ²€μƒ‰ν•΄λ³΄μ„Έμš”.")
elif search_button and not query:
st.warning("⚠️ 검색어λ₯Ό μž…λ ₯ν•΄μ£Όμ„Έμš”.")
else:
# μ»¬λ ‰μ…˜μ΄ μ—†λŠ” 경우
st.warning("⚠️ λ‘œλ“œλœ μ»¬λ ‰μ…˜μ΄ μ—†μŠ΅λ‹ˆλ‹€.")
st.markdown("""
### πŸ“ μ»¬λ ‰μ…˜ 파일 μ—…λ‘œλ“œ 방법
1. **λ‘œμ»¬μ—μ„œ μ»¬λ ‰μ…˜ 생성**:
```python
# 원본 μ½”λ“œ μ‚¬μš©
searcher = HybridMultiCollectionSearcher()
searcher.add_collection("μˆ˜λ™λ³€μ†κΈ°", metadata_dir, chunks_dir)
searcher.save_collections("./saved_collections")
```
2. **μƒμ„±λœ νŒŒμΌλ“€μ„ ν—ˆκΉ…νŽ˜μ΄μŠ€ Space에 μ—…λ‘œλ“œ**:
- `saved_collections/` 폴더 전체λ₯Ό μ—…λ‘œλ“œ
- 각 μ»¬λ ‰μ…˜λ³„λ‘œ `.pkl`, `.index` νŒŒμΌλ“€μ΄ 포함됨
3. **μ•± μž¬μ‹œμž‘** ν›„ 검색 κ°€λŠ₯
""")
# μ‚¬μš© κ°€μ΄λ“œ (μ»¬λ ‰μ…˜μ΄ μžˆμ„ λ•Œλ§Œ ν‘œμ‹œ)
if 'searcher' in st.session_state and st.session_state.searcher.list_collections() and not query:
st.markdown("### πŸ’‘ μ‚¬μš© κ°€μ΄λ“œ")
col1, col2 = st.columns(2)
with col1:
st.markdown("""
**πŸ”§ μ •λΉ„ μž‘μ—… 질문:**
- "μˆ˜λ™λ³€μ†κΈ° νƒˆκ±°λŠ” μ–΄λ–»κ²Œ ν•˜λ‚˜μš”?"
- "클러치 점검 방법을 μ•Œλ €μ£Όμ„Έμš”"
- "변속기 였일 κ΅ν™˜ μ ˆμ°¨λŠ”?"
""")
with col2:
st.markdown("""
**βš™οΈ λΆ€ν’ˆ 정보 질문:**
- "브레이크 νŒ¨λ“œ 사양은?"
- "μ—”μ§„ 였일 μš©λŸ‰μ€ μ–Όλ§ˆμΈκ°€μš”?"
- "타이어 곡기압 κΈ°μ€€μΉ˜λŠ”?"
""")
if __name__ == "__main__":
main()