File size: 7,892 Bytes
41cc6f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
import os
import pickle
import numpy as np
# import lancedb # 🦁 Lite: Not used
# import torch
# from sentence_transformers import SentenceTransformer
# from kiwipiepy import Kiwi
from sqlalchemy import create_engine, text # 🦁 SQL Support

class NyangRagEngine:
    def __init__(self, base_dir=None):
        # 🦁 [Engine Init] Lightweight Mode (SQL Only)
        curr_file = os.path.abspath(__file__)
        core_dir = os.path.dirname(curr_file) 
        chatbot_dir = os.path.dirname(core_dir) 
        back_dir = os.path.dirname(chatbot_dir) 
        
        from huggingface_hub import snapshot_download

        # 🦁 [Force Update] κΈ°μ‘΄ DB 파일 μ‚­μ œ (μ΅œμ‹  버전 κ°•μ œ λ‹€μš΄λ‘œλ“œ)
        db_download_path = os.path.join(back_dir, "hf_db_storage")
        old_db_file = os.path.join(db_download_path, "petshop.db")
        if os.path.exists(old_db_file):
            try:
                os.remove(old_db_file)
                print(f"🧹 [Engine-Lite] Deleted old DB file: {old_db_file}")
            except Exception as e:
                print(f"⚠️ Failed to delete old DB: {e}")

        # 🦁 [Lite] Vector DB Download REMOVED (Handled by Google Cloud Run)
        
        # 🦁 [Lite] SQL Data Download (Keep this for Product Search)
        hf_db_repo = os.getenv("HF_DB_REPO")
        db_download_path = os.path.join(back_dir, "hf_db_storage")
        
        if hf_db_repo:
            try:
                print(f"πŸš€ [Engine-Lite] Downloading SQL Data from HF: {hf_db_repo}")
                os.makedirs(db_download_path, exist_ok=True)
                snapshot_download(
                    repo_id=hf_db_repo,
                    repo_type="dataset",
                    local_dir=db_download_path,
                    token=os.getenv("HF_TOKEN")
                )
                print("βœ… SQL Data Download Complete!")
            except Exception as e:
                print(f"❌ SQL Data Download Failed: {e}")

        # 1. LanceDB & Cache Paths (Not used in Backend)
        self.data_dir = None
        self.cache_path = None
        
        # 2. SQL Database Path Strategy
        potential_paths = [
            os.path.join(db_download_path, "petshop.db"),          # HF Download
            os.path.join(back_dir, "instance", "petshop.db"),      # Default Local
            os.path.join(back_dir, "petshop.db")                   # Root Local
        ]
        
        self.sql_path = None
        for p in potential_paths:
            if os.path.exists(p):
                self.sql_path = p
                print(f"🦁 [Engine-Lite] Selected SQL DB: {self.sql_path}")
                break
        
        if not self.sql_path:
             print(f"❌ Error: petshop.db NOT FOUND!")

        # Initialize attributes to None (Not used)
        self.db = None
        self.embed_model = None
        self.kiwi = None
        self.coords_cache = None
        self.meta_cache = []
        self.id_to_idx = {}
        
    def load_resources(self):
        print("\n" + "="*50)
        print("πŸš€ [Engine-Lite] Skipping Heavy Resources (RAG is on Cloud Run)")
        print(f"   - SQL Path: {self.sql_path}")
        
        # 🦁 No heavy loading here!
        if self.sql_path and os.path.exists(self.sql_path):
            print("βœ… SQL DB is ready.")
        else:
            print("⚠️ SQL DB not found.")
            
        print("πŸŽ‰ [Engine-Lite] READY (Lightweight Mode)")
        print("="*50 + "\n")
        return True

    # 🦁 New: SQL Keyword Search (Multi-Field Weighted Scoring)
    def search_sql(self, keywords, limit=15):
        if not keywords or not self.sql_path: 
            print(f"⚠️ SQL Search Skipped: path={self.sql_path}")
            return []
        
        try:
            sql_engine = create_engine(f"sqlite:///{self.sql_path}")
            results = []
            
            # 1. Fetch Candidates (Broad Multi-Field Search)
            conditions = []
            params = {}
            valid_kws = [k for k in keywords if len(k) > 1]
            if not valid_kws: return []

            # Fields to search
            fields = ["title", "content", "category", "sub_category"]

            for i, kw in enumerate(valid_kws):
                key = f"kw{i}"
                field_queries = [f"({f} LIKE :{key})" for f in fields]
                conditions.append(f"({' OR '.join(field_queries)})")
                params[key] = f"%{kw}%"
            
            # Combine all keyword conditions with OR to get broad candidates
            where_clause = " OR ".join(conditions)
            query = text(f"SELECT id, title, price, category, content, sub_category, pet_type, stock, review_count, img_url FROM product WHERE {where_clause} LIMIT 500")
            
            with sql_engine.connect() as conn:
                rows = conn.execute(query, params).fetchall()
                
            # 2. Advanced Weighting & Scoring
            scored_rows = []
            targets = ["고양이", "κ°•μ•„μ§€", "cat", "dog", "관상어", "μ†Œλ™λ¬Ό", "μ‘°λ₯˜"]
            
            for r in rows:
                score = 0
                title = (r[1] or "").lower()
                content = (r[4] or "").lower()
                category = (r[3] or "").lower()
                sub_cat = (r[5] or "").lower()
                
                for kw in valid_kws:
                    kw_lower = kw.lower()
                    
                    # 🎯 Title Match (Highest Weight)
                    if kw_lower in title:
                        score += 15.0 + (title.count(kw_lower) * 2.0)
                    
                    # πŸ“‚ Category Match (Medium Weight)
                    if kw_lower in category or kw_lower in sub_cat:
                        score += 8.0
                        
                    # πŸ“ Content Match (Lower Weight)
                    if kw_lower in content:
                        score += 3.0 + (content.count(kw_lower) * 0.5)
                        
                    # πŸ• Pet Type Match (Targeting Boost)
                    if kw_lower in targets:
                        score += 10.0
                
                # Bonus for Matching Multiple Different Keywords (Co-occurrence)
                matches = sum(1 for kw in valid_kws if kw.lower() in (title + content + category))
                if matches > 1:
                    score *= (1.2 ** (matches - 1)) # Multi-keyword bonus
                
                scored_rows.append((r, score))
            
            # Sort by refined score
            scored_rows.sort(key=lambda x: x[1], reverse=True)
            
            # 3. Format Top Results
            for r, score in scored_rows[:limit]:
                results.append({
                    "id": f"shop_{r[0]}",
                    "title": r[1],
                    "price": r[2],
                    "category": r[3],
                    "content": r[4],
                    "sub_category": r[5],
                    "pet_type": r[6],
                    "stock": r[7],
                    "review_count": r[8],
                    "img_url": r[9],
                    "link": f"/product/{r[0]}",
                    "score": score, # Use the weighted score
                    "source": "homepage",
                    "type": "product"
                })
            return results
        except Exception as e:
            print(f"❌ SQL Search Failed: {e}")
            return []

    # 🦁 Stub Methods (No-op in Lite mode)
    def extract_keywords(self, text): return []
    def cluster_and_analyze(self, s1_data, request_id="unknown"): return {}, {}
    def search_hybrid(self, query, top_k=50, request_id="unknown"): return [], [], {}, []
    def search_refined(self, queries, request_id="unknown"): return []
    def rrf_merge(self, list1, list2_group, k=60): return []