import os import pickle import numpy as np # import lancedb # 🦁 Lite: Not used # import torch # from sentence_transformers import SentenceTransformer # from kiwipiepy import Kiwi from sqlalchemy import create_engine, text # 🦁 SQL Support class NyangRagEngine: def __init__(self, base_dir=None): # 🦁 [Engine Init] Lightweight Mode (SQL Only) curr_file = os.path.abspath(__file__) core_dir = os.path.dirname(curr_file) chatbot_dir = os.path.dirname(core_dir) back_dir = os.path.dirname(chatbot_dir) from huggingface_hub import snapshot_download # 🦁 [Force Update] κΈ°μ‘΄ DB 파일 μ‚­μ œ (μ΅œμ‹  버전 κ°•μ œ λ‹€μš΄λ‘œλ“œ) db_download_path = os.path.join(back_dir, "hf_db_storage") old_db_file = os.path.join(db_download_path, "petshop.db") if os.path.exists(old_db_file): try: os.remove(old_db_file) print(f"🧹 [Engine-Lite] Deleted old DB file: {old_db_file}") except Exception as e: print(f"⚠️ Failed to delete old DB: {e}") # 🦁 [Lite] Vector DB Download REMOVED (Handled by Google Cloud Run) # 🦁 [Lite] SQL Data Download (Keep this for Product Search) hf_db_repo = os.getenv("HF_DB_REPO") db_download_path = os.path.join(back_dir, "hf_db_storage") if hf_db_repo: try: print(f"πŸš€ [Engine-Lite] Downloading SQL Data from HF: {hf_db_repo}") os.makedirs(db_download_path, exist_ok=True) snapshot_download( repo_id=hf_db_repo, repo_type="dataset", local_dir=db_download_path, token=os.getenv("HF_TOKEN") ) print("βœ… SQL Data Download Complete!") except Exception as e: print(f"❌ SQL Data Download Failed: {e}") # 1. LanceDB & Cache Paths (Not used in Backend) self.data_dir = None self.cache_path = None # 2. SQL Database Path Strategy potential_paths = [ os.path.join(db_download_path, "petshop.db"), # HF Download os.path.join(back_dir, "instance", "petshop.db"), # Default Local os.path.join(back_dir, "petshop.db") # Root Local ] self.sql_path = None for p in potential_paths: if os.path.exists(p): self.sql_path = p print(f"🦁 [Engine-Lite] Selected SQL DB: {self.sql_path}") break if not self.sql_path: print(f"❌ Error: petshop.db NOT FOUND!") # Initialize attributes to None (Not used) self.db = None self.embed_model = None self.kiwi = None self.coords_cache = None self.meta_cache = [] self.id_to_idx = {} def load_resources(self): print("\n" + "="*50) print("πŸš€ [Engine-Lite] Skipping Heavy Resources (RAG is on Cloud Run)") print(f" - SQL Path: {self.sql_path}") # 🦁 No heavy loading here! if self.sql_path and os.path.exists(self.sql_path): print("βœ… SQL DB is ready.") else: print("⚠️ SQL DB not found.") print("πŸŽ‰ [Engine-Lite] READY (Lightweight Mode)") print("="*50 + "\n") return True # 🦁 New: SQL Keyword Search (Multi-Field Weighted Scoring) def search_sql(self, keywords, limit=15): if not keywords or not self.sql_path: print(f"⚠️ SQL Search Skipped: path={self.sql_path}") return [] try: sql_engine = create_engine(f"sqlite:///{self.sql_path}") results = [] # 1. Fetch Candidates (Broad Multi-Field Search) conditions = [] params = {} valid_kws = [k for k in keywords if len(k) > 1] if not valid_kws: return [] # Fields to search fields = ["title", "content", "category", "sub_category"] for i, kw in enumerate(valid_kws): key = f"kw{i}" field_queries = [f"({f} LIKE :{key})" for f in fields] conditions.append(f"({' OR '.join(field_queries)})") params[key] = f"%{kw}%" # Combine all keyword conditions with OR to get broad candidates where_clause = " OR ".join(conditions) query = text(f"SELECT id, title, price, category, content, sub_category, pet_type, stock, review_count, img_url FROM product WHERE {where_clause} LIMIT 500") with sql_engine.connect() as conn: rows = conn.execute(query, params).fetchall() # 2. Advanced Weighting & Scoring scored_rows = [] targets = ["고양이", "κ°•μ•„μ§€", "cat", "dog", "관상어", "μ†Œλ™λ¬Ό", "μ‘°λ₯˜"] for r in rows: score = 0 title = (r[1] or "").lower() content = (r[4] or "").lower() category = (r[3] or "").lower() sub_cat = (r[5] or "").lower() for kw in valid_kws: kw_lower = kw.lower() # 🎯 Title Match (Highest Weight) if kw_lower in title: score += 15.0 + (title.count(kw_lower) * 2.0) # πŸ“‚ Category Match (Medium Weight) if kw_lower in category or kw_lower in sub_cat: score += 8.0 # πŸ“ Content Match (Lower Weight) if kw_lower in content: score += 3.0 + (content.count(kw_lower) * 0.5) # πŸ• Pet Type Match (Targeting Boost) if kw_lower in targets: score += 10.0 # Bonus for Matching Multiple Different Keywords (Co-occurrence) matches = sum(1 for kw in valid_kws if kw.lower() in (title + content + category)) if matches > 1: score *= (1.2 ** (matches - 1)) # Multi-keyword bonus scored_rows.append((r, score)) # Sort by refined score scored_rows.sort(key=lambda x: x[1], reverse=True) # 3. Format Top Results for r, score in scored_rows[:limit]: results.append({ "id": f"shop_{r[0]}", "title": r[1], "price": r[2], "category": r[3], "content": r[4], "sub_category": r[5], "pet_type": r[6], "stock": r[7], "review_count": r[8], "img_url": r[9], "link": f"/product/{r[0]}", "score": score, # Use the weighted score "source": "homepage", "type": "product" }) return results except Exception as e: print(f"❌ SQL Search Failed: {e}") return [] # 🦁 Stub Methods (No-op in Lite mode) def extract_keywords(self, text): return [] def cluster_and_analyze(self, s1_data, request_id="unknown"): return {}, {} def search_hybrid(self, query, top_k=50, request_id="unknown"): return [], [], {}, [] def search_refined(self, queries, request_id="unknown"): return [] def rrf_merge(self, list1, list2_group, k=60): return []