pradeep4321 commited on
Commit
fb3391a
Β·
verified Β·
1 Parent(s): f5f3b3e

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +27 -53
src/app.py CHANGED
@@ -25,30 +25,27 @@ st.title("πŸ” Advanced Multi-Search Product Engine")
25
  # ==============================
26
  # LOAD MODEL
27
  # ==============================
28
- if "model" not in st.session_state:
29
- with st.spinner("Loading AI model..."):
30
- st.session_state.model = SentenceTransformer(
31
- 'all-MiniLM-L6-v2',
32
- device='cpu'
33
- )
34
 
35
- model = st.session_state.model
36
 
37
  # ==============================
38
- # SEARCH INFO (UPDATED)
39
  # ==============================
40
  search_info = {
41
  "Keyword": ("Exact match", "iphone"),
42
  "Regex": ("Pattern match", "^Samsung"),
43
  "Boolean": ("AND / OR logic", "nike AND shoes"),
44
  "Fuzzy": ("Spelling mistakes", "iphon"),
45
- "N-Gram": ("Partial word", "iph"),
46
  "Prefix": ("Word starts with", "Sam"),
47
  "Suffix": ("Word ends with", "phone"),
48
  "TF-IDF": ("Keyword ranking", "wireless headphones"),
49
  "BM25": ("Advanced ranking", "gaming laptop"),
50
  "Semantic": ("Meaning search", "sports footwear"),
51
- "FAISS": ("Fast semantic", "music device"),
52
  "Hybrid": ("TF-IDF + Semantic", "running shoes"),
53
  "Query Expansion": ("Auto synonyms", "speaker"),
54
  "Weighted Hybrid": ("TF-IDF + Semantic + BM25", "best laptop"),
@@ -56,35 +53,21 @@ search_info = {
56
  }
57
 
58
  # ==============================
59
- # FILE LOAD (KEEP YOUR LOGIC)
60
  # ==============================
61
- #uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
62
-
63
- if uploaded_file:
64
- df = pd.read_csv("src/products_10k.csv")
65
- else:
66
- st.info("Using sample dataset")
67
- df = pd.DataFrame({
68
- "product_name": [
69
- "iPhone 14 Pro",
70
- "Samsung Galaxy S23",
71
- "Nike Running Shoes",
72
- "Dell Gaming Laptop",
73
- "Bluetooth Speaker"
74
- ],
75
- "category": ["Mobile", "Mobile", "Footwear", "Laptop", "Electronics"],
76
- "brand": ["Apple", "Samsung", "Nike", "Dell", "JBL"],
77
- "description": [
78
- "Latest smartphone",
79
- "Android flagship phone",
80
- "Comfort sports shoes",
81
- "High performance laptop",
82
- "Portable music device"
83
- ]
84
- })
85
 
86
  # ==============================
87
- # DATA PREVIEW CONTROL
88
  # ==============================
89
  st.subheader("πŸ“„ Data Preview")
90
 
@@ -106,7 +89,7 @@ products = df["combined"].tolist()
106
  # ==============================
107
  # PREPROCESS
108
  # ==============================
109
- @st.cache(allow_output_mutation=True)
110
  def preprocess_data(products):
111
  tfidf = TfidfVectorizer()
112
  tfidf_matrix = tfidf.fit_transform(products)
@@ -159,14 +142,13 @@ def fuzzy_search(q):
159
  return sorted(scores, key=lambda x: x[1], reverse=True)
160
 
161
  def ngram_search(q):
162
- return [(i, 1) for i, p in enumerate(products) if q.lower() in p.lower()]
 
163
 
164
- # βœ… FIXED PREFIX (word-level)
165
  def prefix_search(q):
166
  return [(i, 1) for i, p in enumerate(products)
167
  if any(word.startswith(q.lower()) for word in p.lower().split())]
168
 
169
- # βœ… FIXED SUFFIX (word-level)
170
  def suffix_search(q):
171
  return [(i, 1) for i, p in enumerate(products)
172
  if any(word.endswith(q.lower()) for word in p.lower().split())]
@@ -197,14 +179,12 @@ def hybrid_search(q):
197
  sem_res = dict(semantic_search(q))
198
  return [(i, tfidf_res.get(i, 0) + sem_res.get(i, 0)) for i in range(len(products))]
199
 
200
- # βœ… IMPROVED QUERY EXPANSION
201
  def query_expansion_search(q):
202
  expanded = q.split()
203
  for word in q.split():
204
  expanded += list(get_synonyms(word))
205
  return tfidf_search(" ".join(expanded))
206
 
207
- # βœ… IMPROVED WEIGHTED HYBRID
208
  def weighted_hybrid(q):
209
  tfidf_res = dict(tfidf_search(q))
210
  sem_res = dict(semantic_search(q))
@@ -216,15 +196,16 @@ def weighted_hybrid(q):
216
  0.2 * bm25_res.get(i, 0))
217
  for i in range(len(products))]
218
 
219
- # βœ… FIXED ENSEMBLE (NORMALIZED)
220
  def ensemble_search(q):
221
  tfidf_res = np.array([s for _, s in tfidf_search(q)])
222
  sem_res = np.array([s for _, s in semantic_search(q)])
223
  bm25_res = np.array([s for _, s in bm25_search(q)])
224
 
225
- combined = tfidf_res/np.max(tfidf_res+1e-6) + \
226
- sem_res/np.max(sem_res+1e-6) + \
227
- bm25_res/np.max(bm25_res+1e-6)
 
 
228
 
229
  return list(enumerate(combined))
230
 
@@ -241,11 +222,6 @@ st.markdown(f"""
241
  """)
242
 
243
  query = st.text_input("Enter your search query")
244
-
245
- if st.button("Try Example"):
246
- query = example
247
- st.success(f"Loaded: {query}")
248
-
249
  top_k = st.slider("Top Results", 5, 20, 10)
250
 
251
  # ==============================
@@ -274,8 +250,6 @@ if st.button("Search"):
274
  }
275
 
276
  results = func_map[search_type](query)
277
-
278
- # Sort results
279
  results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
280
 
281
  indices = [i for i, _ in results]
 
25
  # ==============================
26
  # LOAD MODEL
27
  # ==============================
28
+ @st.cache_resource
29
+ def load_model():
30
+ return SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
 
 
 
31
 
32
+ model = load_model()
33
 
34
  # ==============================
35
+ # SEARCH INFO
36
  # ==============================
37
  search_info = {
38
  "Keyword": ("Exact match", "iphone"),
39
  "Regex": ("Pattern match", "^Samsung"),
40
  "Boolean": ("AND / OR logic", "nike AND shoes"),
41
  "Fuzzy": ("Spelling mistakes", "iphon"),
42
+ "N-Gram": ("Partial word match", "iph"),
43
  "Prefix": ("Word starts with", "Sam"),
44
  "Suffix": ("Word ends with", "phone"),
45
  "TF-IDF": ("Keyword ranking", "wireless headphones"),
46
  "BM25": ("Advanced ranking", "gaming laptop"),
47
  "Semantic": ("Meaning search", "sports footwear"),
48
+ "FAISS": ("Fast semantic search", "music device"),
49
  "Hybrid": ("TF-IDF + Semantic", "running shoes"),
50
  "Query Expansion": ("Auto synonyms", "speaker"),
51
  "Weighted Hybrid": ("TF-IDF + Semantic + BM25", "best laptop"),
 
53
  }
54
 
55
  # ==============================
56
+ # LOAD DATA
57
  # ==============================
58
+ try:
59
+ df = pd.read_csv("products_10k.csv")
60
+ st.success("βœ… Data loaded successfully")
61
+ except Exception as e:
62
+ st.error(f"❌ Error loading file: {e}")
63
+ st.stop()
64
+
65
+ if df.empty:
66
+ st.error("Dataset is empty!")
67
+ st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # ==============================
70
+ # DATA PREVIEW
71
  # ==============================
72
  st.subheader("πŸ“„ Data Preview")
73
 
 
89
  # ==============================
90
  # PREPROCESS
91
  # ==============================
92
+ @st.cache_resource
93
  def preprocess_data(products):
94
  tfidf = TfidfVectorizer()
95
  tfidf_matrix = tfidf.fit_transform(products)
 
142
  return sorted(scores, key=lambda x: x[1], reverse=True)
143
 
144
  def ngram_search(q):
145
+ return [(i, 1) for i, p in enumerate(products)
146
+ if any(q.lower() in word for word in p.lower().split())]
147
 
 
148
  def prefix_search(q):
149
  return [(i, 1) for i, p in enumerate(products)
150
  if any(word.startswith(q.lower()) for word in p.lower().split())]
151
 
 
152
  def suffix_search(q):
153
  return [(i, 1) for i, p in enumerate(products)
154
  if any(word.endswith(q.lower()) for word in p.lower().split())]
 
179
  sem_res = dict(semantic_search(q))
180
  return [(i, tfidf_res.get(i, 0) + sem_res.get(i, 0)) for i in range(len(products))]
181
 
 
182
  def query_expansion_search(q):
183
  expanded = q.split()
184
  for word in q.split():
185
  expanded += list(get_synonyms(word))
186
  return tfidf_search(" ".join(expanded))
187
 
 
188
  def weighted_hybrid(q):
189
  tfidf_res = dict(tfidf_search(q))
190
  sem_res = dict(semantic_search(q))
 
196
  0.2 * bm25_res.get(i, 0))
197
  for i in range(len(products))]
198
 
 
199
  def ensemble_search(q):
200
  tfidf_res = np.array([s for _, s in tfidf_search(q)])
201
  sem_res = np.array([s for _, s in semantic_search(q)])
202
  bm25_res = np.array([s for _, s in bm25_search(q)])
203
 
204
+ combined = (
205
+ tfidf_res / (np.max(tfidf_res) + 1e-6) +
206
+ sem_res / (np.max(sem_res) + 1e-6) +
207
+ bm25_res / (np.max(bm25_res) + 1e-6)
208
+ )
209
 
210
  return list(enumerate(combined))
211
 
 
222
  """)
223
 
224
  query = st.text_input("Enter your search query")
 
 
 
 
 
225
  top_k = st.slider("Top Results", 5, 20, 10)
226
 
227
  # ==============================
 
250
  }
251
 
252
  results = func_map[search_type](query)
 
 
253
  results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
254
 
255
  indices = [i for i, _ in results]