Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import os | |
| import json | |
| import re | |
| import numpy as np | |
| from typing import List, Dict, Tuple, Optional | |
| from pathlib import Path | |
| import logging | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import json | |
| from rank_bm25 import BM25Okapi | |
| # κΈ°λ³Έ λ‘κΉ μ€μ | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # νμ΄μ§ μ€μ | |
| st.set_page_config( | |
| page_title="νμ΄λΈλ¦¬λ μ°¨λ μ λΉ κ²μ μμ€ν ", | |
| page_icon="π§", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # CSS μ€νμΌ | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 2.5rem; | |
| color: #1f4e79; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| font-weight: bold; | |
| } | |
| .search-container { | |
| background-color: #f8f9fa; | |
| padding: 2rem; | |
| border-radius: 10px; | |
| margin-bottom: 2rem; | |
| border-left: 5px solid #1f4e79; | |
| } | |
| .result-card { | |
| background-color: white; | |
| padding: 1.5rem; | |
| border-radius: 8px; | |
| margin-bottom: 1rem; | |
| border: 1px solid #dee2e6; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| } | |
| .score-badge { | |
| background-color: #e3f2fd; | |
| color: #1565c0; | |
| padding: 0.25rem 0.75rem; | |
| border-radius: 15px; | |
| font-size: 0.8rem; | |
| font-weight: bold; | |
| } | |
| .category-badge { | |
| background-color: #f3e5f5; | |
| color: #7b1fa2; | |
| padding: 0.25rem 0.75rem; | |
| border-radius: 15px; | |
| font-size: 0.8rem; | |
| margin-right: 0.5rem; | |
| } | |
| .content-text { | |
| background-color: #f8f9fa; | |
| padding: 1rem; | |
| border-radius: 5px; | |
| border-left: 3px solid #28a745; | |
| margin-top: 1rem; | |
| line-height: 1.6; | |
| } | |
| .metric-card { | |
| background-color: #e8f5e8; | |
| padding: 1rem; | |
| border-radius: 5px; | |
| text-align: center; | |
| margin: 0.5rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # κ°λ¨ν λΆν μ¬μ (μ€μ vocab.py λμ μ¬μ©) | |
| PARTS = [ | |
| "μλλ³μκΈ°", "ν΄λ¬μΉ", "λΈλ μ΄ν¬", "μμ§", "νμ΄μ΄", "λ°°ν°λ¦¬", | |
| "μ€μΌ", "νν°", "벨νΈ", "νΈμ€", "νν", "μΌμ", "νΈλμ€λ―Έμ ", | |
| "λμ€ν¬", "ν¨λ", "μ", "λ‘ν°", "μΊλ¦¬νΌ", "λ§μ€ν°μ€λ¦°λ" | |
| ] | |
| # κ°λ¨ν μμ€ν λ§€ν (μ€μ parts_config.py λμ μ¬μ©) | |
| SYSTEM_PARTS_MAP = { | |
| "μλλ³μκΈ°": ["ν΄λ¬μΉ", "λ³μκΈ°", "λλΌμ΄λΈμ€ννΈ", "λνΌλ μ "], | |
| "μμ§": ["νΌμ€ν€", "μ€λ¦°λ", "ν¬λν¬μ€ννΈ", "μΊ μ€ννΈ"], | |
| "λΈλ μ΄ν¬": ["λΈλ μ΄ν¬ν¨λ", "λΈλ μ΄ν¬λμ€ν¬", "μΊλ¦¬νΌ", "λ§μ€ν°μ€λ¦°λ"] | |
| } | |
| def get_specific_parts_for_system(system_name: str) -> list: | |
| return SYSTEM_PARTS_MAP.get(system_name, []) | |
| def get_all_specific_parts() -> list: | |
| all_parts = [] | |
| for parts in SYSTEM_PARTS_MAP.values(): | |
| all_parts.extend(parts) | |
| return list(set(all_parts)) | |
| class SimpleMecab: | |
| """MeCab λμ μ¬μ©ν κ°λ¨ν ννμ λΆμκΈ°""" | |
| def pos(self, text): | |
| # κ°λ¨ν λͺ μ¬/λμ¬ μΆμΆ (μ€μ νκ²½μμλ MeCab μ¬μ©) | |
| words = text.split() | |
| return [(word, 'NN') for word in words if len(word) > 1] | |
| class HybridMultiCollectionSearcher: | |
| def __init__(self, model_name: str = "upskyy/bge-m3-korean", target_system: str = None): | |
| """ | |
| νμ΄λΈλ¦¬λ λ€μ€ 컬λ μ κ²μκΈ° (λ²‘ν° + ν€μλ κ²μ) | |
| """ | |
| self.model = None # λμ€μ λ‘λ | |
| self.collections = {} | |
| self.bm25_indexes = {} | |
| self.target_system = target_system | |
| self.mecab = SimpleMecab() # κ°λ¨ν λΆμκΈ° μ¬μ© | |
| self.model_name = model_name | |
| def load_model(_self): | |
| """λͺ¨λΈμ μΊμμ ν¨κ» λ‘λ""" | |
| try: | |
| return SentenceTransformer(_self.model_name) | |
| except Exception as e: | |
| st.error(f"λͺ¨λΈ λ‘λ μ€ν¨: {e}") | |
| return None | |
| def _extract_nouns_and_verbs(self, text: str) -> str: | |
| """κ°λ¨ν λͺ μ¬μ λμ¬ μΆμΆ""" | |
| try: | |
| # λΆνλͺ μ°μ μ²λ¦¬ | |
| for part in PARTS: | |
| if part in text: | |
| text = text.replace(part, f" {part} ") | |
| # κ°λ¨ν λͺ μ¬ μΆμΆ (μ€μ λ‘λ MeCab μ¬μ©) | |
| morphs = self.mecab.pos(text) | |
| meaningful_words = [] | |
| for word, pos in morphs: | |
| if len(word) > 1 and not word.isspace(): | |
| meaningful_words.append(word) | |
| return ' '.join(meaningful_words) | |
| except Exception as e: | |
| return text | |
| def _normalize_text_for_matching(self, text: str) -> str: | |
| normalized = text.lower() | |
| normalized = re.sub(r'[.]', '', normalized) | |
| return normalized | |
| def _normalize_scores(self, scores: np.ndarray) -> np.ndarray: | |
| """μ μλ₯Ό 0-1 λ²μλ‘ μ κ·ν""" | |
| scores = np.array(scores) | |
| if len(scores) == 0 or scores.max() == scores.min(): | |
| return np.ones_like(scores) * 0.5 | |
| return (scores - scores.min()) / (scores.max() - scores.min()) | |
| def _calculate_boost_score(self, original_query: str, processed_query: str, metadata: Dict, content: str) -> float: | |
| """κ°λ¨ν λΆμ€ν μ μ κ³μ°""" | |
| boost_score = 0 | |
| query_lower = original_query.lower() | |
| # μ½ν μΈ νμ λ§€μΉ | |
| content_type = metadata.get('content_type', '') | |
| if 'νκ±°' in query_lower and 'νκ±°' in content_type: | |
| boost_score += 0.5 | |
| if 'μ₯μ°©' in query_lower and 'μ₯μ°©' in content_type: | |
| boost_score += 0.5 | |
| if 'μ κ²' in query_lower and 'μ κ²' in content_type: | |
| boost_score += 0.5 | |
| # μμ€ν λ§€μΉ | |
| system = metadata.get('vehicle_info', {}).get('system', '') | |
| if system and any(word in system.lower() for word in query_lower.split()): | |
| boost_score += 0.3 | |
| return boost_score | |
| def create_sample_collection(self, collection_name: str): | |
| """μν λ°μ΄ν°λ‘ 컬λ μ μμ±""" | |
| try: | |
| if self.model is None: | |
| self.model = self.load_model() | |
| if self.model is None: | |
| return False | |
| # μν λ°μ΄ν° | |
| sample_data = [ | |
| { | |
| 'chunk_id': 'sample_001', | |
| 'content': 'μλλ³μκΈ° νκ±° μμλ λ¨Όμ μμ§μ μ μ§νκ³ λ³μκΈ° μ€μΌμ λ°°μΆν©λλ€. ν΄λ¬μΉλ₯Ό λΆλ¦¬ν ν λ³μκΈ°λ₯Ό νκ±°ν©λλ€.', | |
| 'metadata': { | |
| 'chunk_id': 'sample_001', | |
| 'content_type': 'νκ±°λ°©λ²', | |
| 'main_topic': 'μλλ³μκΈ° νκ±°', | |
| 'vehicle_info': {'system': 'μλλ³μκΈ°', 'model': 'μμ΄λ‘μν°'}, | |
| 'category_levels': ['λ³μκΈ°', 'μλλ³μκΈ°', 'νκ±°λ°©λ²'], | |
| 'extracted_components': ['λ³μκΈ°', 'ν΄λ¬μΉ'] | |
| } | |
| }, | |
| { | |
| 'chunk_id': 'sample_002', | |
| 'content': 'μλλ³μκΈ° μ₯μ°©μ νκ±°μ μμμΌλ‘ μ§νν©λλ€. λ³μκΈ°λ₯Ό μ νν μμΉμ κ³ μ νκ³ ν΄λ¬μΉλ₯Ό μ°κ²°ν©λλ€.', | |
| 'metadata': { | |
| 'chunk_id': 'sample_002', | |
| 'content_type': 'μ₯μ°©λ°©λ²', | |
| 'main_topic': 'μλλ³μκΈ° μ₯μ°©', | |
| 'vehicle_info': {'system': 'μλλ³μκΈ°', 'model': 'μμ΄λ‘μν°'}, | |
| 'category_levels': ['λ³μκΈ°', 'μλλ³μκΈ°', 'μ₯μ°©λ°©λ²'], | |
| 'extracted_components': ['λ³μκΈ°', 'ν΄λ¬μΉ'] | |
| } | |
| }, | |
| { | |
| 'chunk_id': 'sample_003', | |
| 'content': 'λ³μκΈ° μ€μΌ μ κ² μ μ€μΌ λ 벨과 μ€μΌ μνλ₯Ό νμΈν©λλ€. κ·μ λμ 2.5Lμ΄λ©° μ€μΌ μ¨λλ 80Β°Cμμ μΈ‘μ ν©λλ€.', | |
| 'metadata': { | |
| 'chunk_id': 'sample_003', | |
| 'content_type': 'μ κ²μ μ°¨', | |
| 'main_topic': 'μ€μΌ μ κ²', | |
| 'vehicle_info': {'system': 'μλλ³μκΈ°', 'model': 'μμ΄λ‘μν°'}, | |
| 'category_levels': ['λ³μκΈ°', 'μλλ³μκΈ°', 'μ κ²μ μ°¨'], | |
| 'extracted_components': ['μ€μΌ'] | |
| } | |
| } | |
| ] | |
| # κ²μ ν μ€νΈ μμ± | |
| search_texts = [] | |
| metadata_list = [] | |
| content_dict = {} | |
| for data in sample_data: | |
| metadata = data['metadata'] | |
| content = data['content'] | |
| # κ²μμ© ν μ€νΈ κ΅¬μ± | |
| search_components = [ | |
| metadata.get('content_type', ''), | |
| metadata.get('main_topic', ''), | |
| ' '.join(metadata.get('category_levels', [])), | |
| content | |
| ] | |
| search_text = self._extract_nouns_and_verbs(' '.join(search_components)) | |
| search_texts.append(search_text) | |
| metadata_list.append(metadata) | |
| content_dict[metadata['chunk_id']] = content | |
| # λ²‘ν° μλ² λ© μμ± | |
| embeddings = self.model.encode(search_texts, show_progress_bar=False) | |
| # FAISS μΈλ±μ€ μμ± | |
| embedding_dim = embeddings.shape[1] | |
| faiss.normalize_L2(embeddings) | |
| faiss_index = faiss.IndexFlatIP(embedding_dim) | |
| faiss_index.add(embeddings.astype(np.float32)) | |
| # BM25 μΈλ±μ€ μμ± | |
| tokenized_docs = [text.split() for text in search_texts] | |
| bm25_index = BM25Okapi(tokenized_docs) | |
| # 컬λ μ μ μ₯ | |
| self.collections[collection_name] = { | |
| 'metadata_list': metadata_list, | |
| 'content_dict': content_dict, | |
| 'search_texts': search_texts, | |
| 'faiss_index': faiss_index | |
| } | |
| self.bm25_indexes[collection_name] = bm25_index | |
| return True | |
| except Exception as e: | |
| logger.error(f"μν 컬λ μ μμ± μ€ν¨: {e}") | |
| return False | |
| """μ μ₯λ νμ΄λΈλ¦¬λ 컬λ μ λ€ λ‘λ (FAISS + BM25) - pickle μμ΄""" | |
| save_dir = Path(save_dir) | |
| if not save_dir.exists(): | |
| logger.warning(f"컬λ μ λλ ν λ¦¬κ° μ‘΄μ¬νμ§ μμ΅λλ€: {save_dir}") | |
| return False | |
| loaded_collections = [] | |
| for collection_dir in save_dir.iterdir(): | |
| if collection_dir.is_dir(): | |
| collection_name = collection_dir.name | |
| try: | |
| # 1. FAISS μΈλ±μ€ λ‘λ | |
| faiss_path = collection_dir / "faiss.index" | |
| if not faiss_path.exists(): | |
| logger.warning(f"FAISS μΈλ±μ€κ° μμ΅λλ€: {faiss_path}") | |
| continue | |
| faiss_index = faiss.read_index(str(faiss_path)) | |
| # 2. BM25 ν ν° λ°μ΄ν° λ‘λ (JSON) | |
| bm25_tokens_path = collection_dir / "bm25_tokens.json" | |
| if not bm25_tokens_path.exists(): | |
| logger.warning(f"BM25 ν ν° λ°μ΄ν°κ° μμ΅λλ€: {bm25_tokens_path}") | |
| continue | |
| with open(bm25_tokens_path, 'r', encoding='utf-8') as f: | |
| tokenized_docs = json.load(f) | |
| # BM25 μΈλ±μ€ μ¬μμ± | |
| bm25_index = BM25Okapi(tokenized_docs) | |
| # 3. λ©νλ°μ΄ν° λ‘λ (JSON) | |
| metadata_path = collection_dir / "metadata.json" | |
| if not metadata_path.exists(): | |
| logger.warning(f"λ©νλ°μ΄ν°κ° μμ΅λλ€: {metadata_path}") | |
| continue | |
| with open(metadata_path, 'r', encoding='utf-8') as f: | |
| save_data = json.load(f) | |
| # 컬λ μ 볡μ | |
| self.collections[collection_name] = { | |
| 'faiss_index': faiss_index, | |
| **save_data | |
| } | |
| self.bm25_indexes[collection_name] = bm25_index | |
| loaded_collections.append(collection_name) | |
| logger.info(f"컬λ μ '{collection_name}' λ‘λ μλ£") | |
| except Exception as e: | |
| logger.error(f"컬λ μ '{collection_name}' λ‘λ μ€ν¨: {e}") | |
| continue | |
| if loaded_collections: | |
| logger.info(f"νμ΄λΈλ¦¬λ 컬λ μ λ‘λ μλ£: {loaded_collections}") | |
| return True | |
| else: | |
| logger.error("λ‘λλ 컬λ μ μ΄ μμ΅λλ€.") | |
| return False | |
| def list_collections(self) -> List[str]: | |
| """λ±λ‘λ 컬λ μ λͺ©λ‘ λ°ν""" | |
| return list(self.collections.keys()) | |
| def search_collection(self, collection_name: str, query: str, top_k: int = 5, alpha: float = 0.7) -> List[Dict]: | |
| """νμ΄λΈλ¦¬λ κ²μ μν""" | |
| if collection_name not in self.collections: | |
| return [] | |
| if self.model is None: | |
| self.model = self.load_model() | |
| if self.model is None: | |
| return [] | |
| collection = self.collections[collection_name] | |
| faiss_index = collection['faiss_index'] | |
| metadata_list = collection['metadata_list'] | |
| content_dict = collection['content_dict'] | |
| bm25_index = self.bm25_indexes[collection_name] | |
| # 쿼리 μ²λ¦¬ | |
| processed_query = self._extract_nouns_and_verbs(query) | |
| # λ²‘ν° κ²μ | |
| query_embedding = self.model.encode([processed_query]) | |
| faiss.normalize_L2(query_embedding) | |
| search_k = min(len(metadata_list), top_k * 3) | |
| dense_similarities, dense_indices = faiss_index.search( | |
| query_embedding.astype(np.float32), search_k | |
| ) | |
| # ν€μλ κ²μ | |
| query_tokens = processed_query.split() | |
| sparse_scores = bm25_index.get_scores(query_tokens) | |
| # μ μ μ κ·ν | |
| dense_scores_norm = self._normalize_scores(dense_similarities[0]) | |
| sparse_scores_norm = self._normalize_scores(sparse_scores) | |
| # κ²°κ³Ό μμ± | |
| results = [] | |
| for i, (similarity, idx) in enumerate(zip(dense_similarities[0], dense_indices[0])): | |
| if idx == -1: | |
| continue | |
| metadata = metadata_list[idx] | |
| chunk_id = metadata['chunk_id'] | |
| content = content_dict.get(chunk_id, '') | |
| dense_score = dense_scores_norm[i] | |
| sparse_score = sparse_scores_norm[idx] if idx < len(sparse_scores_norm) else 0 | |
| boost_score = self._calculate_boost_score(query, processed_query, metadata, content) | |
| hybrid_score = (alpha * dense_score + (1 - alpha) * sparse_score + boost_score) | |
| category_levels = metadata.get('category_levels', []) | |
| category_path = ' > '.join(category_levels) | |
| result = { | |
| 'chunk_id': chunk_id, | |
| 'content': content, | |
| 'metadata': metadata, | |
| 'dense_similarity': float(similarity), | |
| 'dense_score': dense_score, | |
| 'sparse_score': sparse_score, | |
| 'boost_score': boost_score, | |
| 'hybrid_score': hybrid_score, | |
| 'vehicle_info': metadata.get('vehicle_info', {}), | |
| 'content_type': metadata.get('content_type', ''), | |
| 'main_topic': metadata.get('main_topic', ''), | |
| 'category_path': category_path, | |
| 'processed_query': processed_query, | |
| } | |
| results.append(result) | |
| results.sort(key=lambda x: x['hybrid_score'], reverse=True) | |
| return results[:top_k] | |
| # Streamlit μ± μμ | |
| def main(): | |
| # μ λͺ© | |
| st.markdown('<h1 class="main-header">π§ νμ΄λΈλ¦¬λ μ°¨λ μ λΉ κ²μ μμ€ν </h1>', unsafe_allow_html=True) | |
| # μ¬μ΄λλ° | |
| with st.sidebar: | |
| st.header("βοΈ μ€μ ") | |
| # κ²μ νλΌλ―Έν° | |
| st.subheader("κ²μ μ€μ ") | |
| top_k = st.slider("κ²°κ³Ό κ°μ", min_value=1, max_value=10, value=5) | |
| alpha = st.slider("λ²‘ν° κ²μ κ°μ€μΉ", min_value=0.0, max_value=1.0, value=0.7, step=0.1) | |
| st.info(f"λ²‘ν° κ²μ: {alpha:.1f}, ν€μλ κ²μ: {1-alpha:.1f}") | |
| # μμ€ν μ ν | |
| st.subheader("λμ μμ€ν ") | |
| target_system = st.selectbox( | |
| "μμ€ν μ ν", | |
| ["μλλ³μκΈ°", "μμ§", "λΈλ μ΄ν¬"], | |
| index=0 | |
| ) | |
| # λ©μΈ μμ | |
| # κ²μκΈ° μ΄κΈ°ν | |
| if 'searcher' not in st.session_state: | |
| with st.spinner('κ²μ μμ€ν μ΄κΈ°ν μ€...'): | |
| try: | |
| st.session_state.searcher = HybridMultiCollectionSearcher(target_system=target_system) | |
| # λ¨Όμ μν λ°μ΄ν°λ‘ ν μ€νΈ | |
| st.info("π§ͺ μν λ°μ΄ν°λ‘ ν μ€νΈ μ€...") | |
| success = st.session_state.searcher.create_sample_collection("ν μ€νΈ") | |
| if success: | |
| st.success("β μν κ²μ μμ€ν μ΄ μ€λΉλμμ΅λλ€!") | |
| st.info("π‘ μ€μ 컬λ μ μ μ¬μ©νλ €λ©΄ `saved_collections` ν΄λλ₯Ό μ λ‘λνμΈμ.") | |
| else: | |
| st.error("β μμ€ν μ΄κΈ°νμ μ€ν¨νμ΅λλ€.") | |
| except Exception as e: | |
| st.error(f"β μ΄κΈ°ν μ€λ₯: {str(e)}") | |
| st.info("π§ λ¬Έμ λ₯Ό ν΄κ²°νλ μ€μ λλ€...") | |
| # κ²μκΈ°κ° μλ κ²½μ°μλ§ μ§ν | |
| if 'searcher' in st.session_state: | |
| available_collections = st.session_state.searcher.list_collections() | |
| # 컬λ μ μ΄ μλ κ²½μ°μλ§ κ²μ μΈν°νμ΄μ€ νμ | |
| if available_collections: | |
| # 컬λ μ μ ν | |
| st.subheader("π κ²μ λμ 컬λ μ ") | |
| selected_collection = st.selectbox( | |
| "컬λ μ μ ν", | |
| available_collections, | |
| help="κ²μν 컬λ μ μ μ ννμΈμ" | |
| ) | |
| # κ²μ μΈν°νμ΄μ€ | |
| with st.container(): | |
| st.markdown('<div class="search-container">', unsafe_allow_html=True) | |
| # κ²μμ΄ μ λ ₯ | |
| query = st.text_input( | |
| "π μ§λ¬Έμ μ λ ₯νμΈμ", | |
| placeholder="μ: μλλ³μκΈ° νκ±°λ μ΄λ»κ² νλμ?", | |
| help="μ°¨λ μ λΉμ κ΄ν μ§λ¬Έμ μμ λ‘κ² μ λ ₯νμΈμ." | |
| ) | |
| # κ²μ λ²νΌ | |
| col1, col2, col3 = st.columns([1, 2, 1]) | |
| with col2: | |
| search_button = st.button("π κ²μνκΈ°", type="primary", use_container_width=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # κ²μ μ€ν | |
| if search_button and query: | |
| with st.spinner('κ²μ μ€...'): | |
| results = st.session_state.searcher.search_collection( | |
| selected_collection, | |
| query, | |
| top_k=top_k, | |
| alpha=alpha | |
| ) | |
| if results: | |
| st.success(f"β {len(results)}κ°μ κ²μ κ²°κ³Όλ₯Ό μ°Ύμμ΅λλ€.") | |
| # κ²μ ν΅κ³ | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.markdown('<div class="metric-card"><strong>κ²μ κ²°κ³Ό</strong><br>' + f'{len(results)}κ°</div>', unsafe_allow_html=True) | |
| with col2: | |
| avg_score = np.mean([r['hybrid_score'] for r in results]) | |
| st.markdown('<div class="metric-card"><strong>νκ· μ μ</strong><br>' + f'{avg_score:.3f}</div>', unsafe_allow_html=True) | |
| with col3: | |
| max_score = max([r['hybrid_score'] for r in results]) | |
| st.markdown('<div class="metric-card"><strong>μ΅κ³ μ μ</strong><br>' + f'{max_score:.3f}</div>', unsafe_allow_html=True) | |
| with col4: | |
| st.markdown('<div class="metric-card"><strong>컬λ μ </strong><br>' + f'{selected_collection}</div>', unsafe_allow_html=True) | |
| st.markdown("---") | |
| # κ²μ κ²°κ³Ό νμ | |
| for i, result in enumerate(results, 1): | |
| st.markdown('<div class="result-card">', unsafe_allow_html=True) | |
| # ν€λ | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| st.markdown(f"### π κ²°κ³Ό {i}: {result['main_topic']}") | |
| with col2: | |
| st.markdown(f'<span class="score-badge">μ μ: {result["hybrid_score"]:.3f}</span>', unsafe_allow_html=True) | |
| # λ©νλ°μ΄ν° | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(f'<span class="category-badge">{result["content_type"]}</span>', unsafe_allow_html=True) | |
| st.markdown(f"**κ²½λ‘:** {result['category_path']}") | |
| with col2: | |
| if result['vehicle_info']: | |
| vehicle = result['vehicle_info'] | |
| st.markdown(f"**μ°¨λ:** {vehicle.get('model', 'N/A')}") | |
| st.markdown(f"**μμ€ν :** {vehicle.get('system', 'N/A')}") | |
| # λ΄μ© | |
| st.markdown('<div class="content-text">', unsafe_allow_html=True) | |
| st.markdown(f"**π λ΄μ©:**\n\n{result['content']}") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # μμΈ μ μ (νμ₯ κ°λ₯) | |
| with st.expander("π μμΈ μ μ 보기"): | |
| score_col1, score_col2, score_col3 = st.columns(3) | |
| with score_col1: | |
| st.metric("λ²‘ν° μ μ", f"{result['dense_score']:.3f}") | |
| with score_col2: | |
| st.metric("ν€μλ μ μ", f"{result['sparse_score']:.3f}") | |
| with score_col3: | |
| st.metric("λΆμ€ν μ μ", f"{result['boost_score']:.3f}") | |
| st.markdown(f"**μ²λ¦¬λ 쿼리:** `{result['processed_query']}`") | |
| st.markdown(f"**μ²ν¬ ID:** `{result['chunk_id']}`") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| st.markdown("---") | |
| else: | |
| st.warning("π€ κ²μ κ²°κ³Όκ° μμ΅λλ€. λ€λ₯Έ ν€μλλ‘ κ²μν΄λ³΄μΈμ.") | |
| elif search_button and not query: | |
| st.warning("β οΈ κ²μμ΄λ₯Ό μ λ ₯ν΄μ£ΌμΈμ.") | |
| else: | |
| # 컬λ μ μ΄ μλ κ²½μ° | |
| st.warning("β οΈ λ‘λλ 컬λ μ μ΄ μμ΅λλ€.") | |
| st.markdown(""" | |
| ### π 컬λ μ νμΌ μ λ‘λ λ°©λ² | |
| 1. **λ‘컬μμ 컬λ μ μμ±**: | |
| ```python | |
| # μλ³Έ μ½λ μ¬μ© | |
| searcher = HybridMultiCollectionSearcher() | |
| searcher.add_collection("μλλ³μκΈ°", metadata_dir, chunks_dir) | |
| searcher.save_collections("./saved_collections") | |
| ``` | |
| 2. **μμ±λ νμΌλ€μ νκΉ νμ΄μ€ Spaceμ μ λ‘λ**: | |
| - `saved_collections/` ν΄λ μ 체λ₯Ό μ λ‘λ | |
| - κ° μ»¬λ μ λ³λ‘ `.pkl`, `.index` νμΌλ€μ΄ ν¬ν¨λ¨ | |
| 3. **μ± μ¬μμ** ν κ²μ κ°λ₯ | |
| """) | |
| # μ¬μ© κ°μ΄λ (컬λ μ μ΄ μμ λλ§ νμ) | |
| if 'searcher' in st.session_state and st.session_state.searcher.list_collections() and not query: | |
| st.markdown("### π‘ μ¬μ© κ°μ΄λ") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown(""" | |
| **π§ μ λΉ μμ μ§λ¬Έ:** | |
| - "μλλ³μκΈ° νκ±°λ μ΄λ»κ² νλμ?" | |
| - "ν΄λ¬μΉ μ κ² λ°©λ²μ μλ €μ£ΌμΈμ" | |
| - "λ³μκΈ° μ€μΌ κ΅ν μ μ°¨λ?" | |
| """) | |
| with col2: | |
| st.markdown(""" | |
| **βοΈ λΆν μ 보 μ§λ¬Έ:** | |
| - "λΈλ μ΄ν¬ ν¨λ μ¬μμ?" | |
| - "μμ§ μ€μΌ μ©λμ μΌλ§μΈκ°μ?" | |
| - "νμ΄μ΄ 곡기μ κΈ°μ€μΉλ?" | |
| """) | |
| if __name__ == "__main__": | |
| main() |