pradeep4321 commited on
Commit
0a8b71e
Β·
verified Β·
1 Parent(s): 3509587

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +60 -202
src/app.py CHANGED
@@ -18,7 +18,29 @@ nltk.download('wordnet', quiet=True)
18
  from nltk.corpus import wordnet
19
 
20
  # ==============================
21
- # AUTHENTICATION (HF FIXED)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # ==============================
23
  def login():
24
  st.title("πŸ” Login Required")
@@ -27,23 +49,25 @@ def login():
27
  password = st.text_input("Password", type="password")
28
 
29
  if st.button("Login"):
 
 
 
 
 
 
 
 
30
 
31
- # βœ… HuggingFace secrets via environment
32
- hf_user = os.environ.get("USERNAME", "admin")
33
- hf_pass = os.environ.get("PASSWORD", "admin123")
34
 
35
- if username == hf_user and password == hf_pass:
36
- st.session_state["authenticated"] = True
37
- st.session_state["user"] = username
38
- st.session_state["login_time"] = pd.Timestamp.now()
 
39
 
40
- log_activity(username, "Login Success", "-", "-")
41
-
42
- st.success("βœ… Login successful")
43
- st.rerun()
44
- else:
45
- log_activity(username, "Login Failed", "-", "-")
46
- st.error("❌ Invalid credentials")
47
 
48
  # ==============================
49
  # SESSION CONTROL
@@ -61,7 +85,7 @@ if not st.session_state["authenticated"]:
61
  st.set_page_config(page_title="Multi Search Engine", layout="wide")
62
  st.title("πŸ” Advanced Multi-Search Product Engine")
63
 
64
- # Sidebar user info
65
  st.sidebar.success(f"πŸ‘€ User: {st.session_state['user']}")
66
  st.sidebar.info(f"πŸ•’ Login: {st.session_state['login_time']}")
67
 
@@ -80,53 +104,6 @@ def load_model():
80
 
81
  model = load_model()
82
 
83
- # ==============================
84
- # LOGGING FUNCTION (CSV SAVE)
85
- # ==============================
86
- LOG_FILE = "user_activity_log.csv"
87
-
88
- def log_activity(user, action, query, search_type):
89
- log_entry = {
90
- "User": user,
91
- "Action": action,
92
- "Query": query,
93
- "Search_Type": search_type,
94
- "Time": str(pd.Timestamp.now())
95
- }
96
-
97
- try:
98
- if os.path.exists(LOG_FILE):
99
- df_log = pd.read_csv(LOG_FILE)
100
- df_log = pd.concat([df_log, pd.DataFrame([log_entry])], ignore_index=True)
101
- else:
102
- df_log = pd.DataFrame([log_entry])
103
-
104
- df_log.to_csv(LOG_FILE, index=False)
105
-
106
- except Exception as e:
107
- st.warning(f"Logging failed: {e}")
108
-
109
- # ==============================
110
- # SEARCH INFO
111
- # ==============================
112
- search_info = {
113
- "Keyword": ("Exact match", "iphone"),
114
- "Regex": ("Pattern match", "^Samsung"),
115
- "Boolean": ("AND / OR logic", "nike AND shoes"),
116
- "Fuzzy": ("Spelling mistakes", "iphon"),
117
- "N-Gram": ("Partial word match", "iph"),
118
- "Prefix": ("Word starts with", "Sam"),
119
- "Suffix": ("Word ends with", "phone"),
120
- "TF-IDF": ("Keyword ranking", "wireless headphones"),
121
- "BM25": ("Advanced ranking", "gaming laptop"),
122
- "Semantic": ("Meaning search", "sports footwear"),
123
- "FAISS": ("Fast semantic search", "music device"),
124
- "Hybrid": ("TF-IDF + Semantic", "running shoes"),
125
- "Query Expansion": ("Auto synonyms", "speaker"),
126
- "Weighted Hybrid": ("TF-IDF + Semantic + BM25", "best laptop"),
127
- "Ensemble": ("Combine all scores", "smartphone")
128
- }
129
-
130
  # ==============================
131
  # LOAD DATA
132
  # ==============================
@@ -138,15 +115,7 @@ except Exception as e:
138
  st.stop()
139
 
140
  # ==============================
141
- # DATA PREVIEW
142
- # ==============================
143
- st.subheader("πŸ“„ Data Preview")
144
-
145
- rows_to_show = st.selectbox("Select rows to view", [10, 20, 50, 100])
146
- st.dataframe(df.head(rows_to_show))
147
-
148
- # ==============================
149
- # COMBINE TEXT
150
  # ==============================
151
  df["combined"] = (
152
  df["product_name"].astype(str) + " " +
@@ -157,15 +126,12 @@ df["combined"] = (
157
 
158
  products = df["combined"].tolist()
159
 
160
- # ==============================
161
- # PREPROCESS
162
- # ==============================
163
  @st.cache_resource
164
  def preprocess_data(products):
165
  tfidf = TfidfVectorizer()
166
  tfidf_matrix = tfidf.fit_transform(products)
167
 
168
- embeddings = model.encode(products, batch_size=64, show_progress_bar=False)
169
  faiss.normalize_L2(embeddings)
170
 
171
  index = faiss.IndexFlatIP(embeddings.shape[1])
@@ -179,167 +145,59 @@ def preprocess_data(products):
179
  tfidf, tfidf_matrix, embeddings, index, bm25 = preprocess_data(products)
180
 
181
  # ==============================
182
- # SYNONYMS
183
- # ==============================
184
- def get_synonyms(word):
185
- synonyms = set()
186
- for syn in wordnet.synsets(word):
187
- for lemma in syn.lemmas():
188
- synonyms.add(lemma.name())
189
- return synonyms
190
-
191
- # ==============================
192
- # SEARCH FUNCTIONS (UNCHANGED)
193
  # ==============================
194
  def keyword_search(q):
195
  return [(i, 1) for i, p in enumerate(products) if q.lower() in p.lower()]
196
 
197
- def regex_search(q):
198
- return [(i, 1) for i, p in enumerate(products) if re.search(q, p, re.IGNORECASE)]
199
-
200
- def boolean_search(q):
201
- if "AND" in q:
202
- terms = q.split("AND")
203
- return [(i, 1) for i, p in enumerate(products)
204
- if all(t.strip().lower() in p.lower() for t in terms)]
205
- elif "OR" in q:
206
- terms = q.split("OR")
207
- return [(i, 1) for i, p in enumerate(products)
208
- if any(t.strip().lower() in p.lower() for t in terms)]
209
- return []
210
-
211
- def fuzzy_search(q):
212
- scores = [(i, fuzz.ratio(q, p)) for i, p in enumerate(products)]
213
- return sorted(scores, key=lambda x: x[1], reverse=True)
214
-
215
- def ngram_search(q):
216
- return [(i, 1) for i, p in enumerate(products)
217
- if any(q.lower() in word for word in p.lower().split())]
218
-
219
- def prefix_search(q):
220
- return [(i, 1) for i, p in enumerate(products)
221
- if any(word.startswith(q.lower()) for word in p.lower().split())]
222
-
223
- def suffix_search(q):
224
- return [(i, 1) for i, p in enumerate(products)
225
- if any(word.endswith(q.lower()) for word in p.lower().split())]
226
-
227
- def tfidf_search(q):
228
- q_vec = tfidf.transform([q])
229
- scores = (tfidf_matrix @ q_vec.T).toarray().flatten()
230
- return list(enumerate(scores))
231
-
232
- def bm25_search(q):
233
- scores = bm25.get_scores(q.split())
234
- return list(enumerate(scores))
235
-
236
  def semantic_search(q):
237
- q_emb = model.encode([q], show_progress_bar=False)
238
  faiss.normalize_L2(q_emb)
239
  scores = np.dot(embeddings, q_emb.T).flatten()
240
  return list(enumerate(scores))
241
 
242
- def faiss_search(q):
243
- q_emb = model.encode([q], show_progress_bar=False)
244
- faiss.normalize_L2(q_emb)
245
- D, I = index.search(np.array(q_emb), 10)
246
- return [(i, float(D[0][idx])) for idx, i in enumerate(I[0])]
247
-
248
- def hybrid_search(q):
249
- tfidf_res = dict(tfidf_search(q))
250
- sem_res = dict(semantic_search(q))
251
- return [(i, tfidf_res.get(i, 0) + sem_res.get(i, 0)) for i in range(len(products))]
252
-
253
- def query_expansion_search(q):
254
- expanded = q.split()
255
- for word in q.split():
256
- expanded += list(get_synonyms(word))
257
- return tfidf_search(" ".join(expanded))
258
-
259
- def weighted_hybrid(q):
260
- tfidf_res = dict(tfidf_search(q))
261
- sem_res = dict(semantic_search(q))
262
- bm25_res = dict(bm25_search(q))
263
-
264
- return [(i,
265
- 0.4 * tfidf_res.get(i, 0) +
266
- 0.4 * sem_res.get(i, 0) +
267
- 0.2 * bm25_res.get(i, 0))
268
- for i in range(len(products))]
269
-
270
- def ensemble_search(q):
271
- tfidf_res = np.array([s for _, s in tfidf_search(q)])
272
- sem_res = np.array([s for _, s in semantic_search(q)])
273
- bm25_res = np.array([s for _, s in bm25_search(q)])
274
-
275
- combined = (
276
- tfidf_res / (np.max(tfidf_res) + 1e-6) +
277
- sem_res / (np.max(sem_res) + 1e-6) +
278
- bm25_res / (np.max(bm25_res) + 1e-6)
279
- )
280
-
281
- return list(enumerate(combined))
282
-
283
  # ==============================
284
  # UI
285
  # ==============================
286
- search_type = st.selectbox("πŸ”Ž Select Search Type", list(search_info.keys()))
287
- explanation, example = search_info[search_type]
288
-
289
- st.markdown(f"""
290
- ### πŸ” {search_type}
291
- - **Explanation:** {explanation}
292
- - **Example:** `{example}`
293
- """)
294
-
295
  query = st.text_input("Enter your search query")
296
  top_k = st.slider("Top Results", 5, 20, 10)
297
 
298
  # ==============================
299
- # SEARCH EXECUTION
300
  # ==============================
301
  if st.button("Search"):
302
  if not query:
303
  st.warning("Enter query")
304
  else:
305
- func_map = {
306
- "Keyword": keyword_search,
307
- "Regex": regex_search,
308
- "Boolean": boolean_search,
309
- "Fuzzy": fuzzy_search,
310
- "N-Gram": ngram_search,
311
- "Prefix": prefix_search,
312
- "Suffix": suffix_search,
313
- "TF-IDF": tfidf_search,
314
- "BM25": bm25_search,
315
- "Semantic": semantic_search,
316
- "FAISS": faiss_search,
317
- "Hybrid": hybrid_search,
318
- "Query Expansion": query_expansion_search,
319
- "Weighted Hybrid": weighted_hybrid,
320
- "Ensemble": ensemble_search
321
- }
322
-
323
- results = func_map[search_type](query)
324
  results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
325
 
326
  # βœ… LOG SEARCH
327
- log_activity(st.session_state["user"], "Search", query, search_type)
 
 
 
 
 
328
 
329
  indices = [i for i, _ in results]
330
  result_df = df.iloc[indices].copy()
331
  result_df["Score"] = [round(score, 4) for _, score in results]
332
 
333
- st.subheader("πŸ”Ž Results")
334
  st.dataframe(result_df)
335
 
336
  # ==============================
337
- # SHOW LOGS
338
  # ==============================
339
- st.sidebar.subheader("πŸ“Š Activity Log")
340
 
341
  if os.path.exists(LOG_FILE):
342
  log_df = pd.read_csv(LOG_FILE)
343
  st.sidebar.dataframe(log_df.tail(10))
344
  else:
345
- st.sidebar.write("No activity yet")
 
18
  from nltk.corpus import wordnet
19
 
20
  # ==============================
21
+ # LOG FUNCTION (FIXED POSITION)
22
+ # ==============================
23
+ LOG_FILE = "user_logs.csv"
24
+
25
+ def log_activity(user, action, query, search_type):
26
+ log_entry = {
27
+ "User": user,
28
+ "Action": action,
29
+ "Query": query,
30
+ "Search Type": search_type,
31
+ "Time": str(pd.Timestamp.now())
32
+ }
33
+
34
+ if os.path.exists(LOG_FILE):
35
+ df_log = pd.read_csv(LOG_FILE)
36
+ df_log = pd.concat([df_log, pd.DataFrame([log_entry])])
37
+ else:
38
+ df_log = pd.DataFrame([log_entry])
39
+
40
+ df_log.to_csv(LOG_FILE, index=False)
41
+
42
+ # ==============================
43
+ # AUTHENTICATION
44
  # ==============================
45
  def login():
46
  st.title("πŸ” Login Required")
 
49
  password = st.text_input("Password", type="password")
50
 
51
  if st.button("Login"):
52
+ try:
53
+ if (
54
+ username == st.secrets["USERNAME"] and
55
+ password == st.secrets["PASSWORD"]
56
+ ):
57
+ st.session_state["authenticated"] = True
58
+ st.session_state["user"] = username
59
+ st.session_state["login_time"] = pd.Timestamp.now()
60
 
61
+ log_activity(username, "Login Success", "-", "-")
 
 
62
 
63
+ st.success("βœ… Login successful")
64
+ st.rerun()
65
+ else:
66
+ log_activity(username, "Login Failed", "-", "-")
67
+ st.error("❌ Invalid credentials")
68
 
69
+ except Exception:
70
+ st.error("⚠️ Secrets not configured properly")
 
 
 
 
 
71
 
72
  # ==============================
73
  # SESSION CONTROL
 
85
  st.set_page_config(page_title="Multi Search Engine", layout="wide")
86
  st.title("πŸ” Advanced Multi-Search Product Engine")
87
 
88
+ # Sidebar info
89
  st.sidebar.success(f"πŸ‘€ User: {st.session_state['user']}")
90
  st.sidebar.info(f"πŸ•’ Login: {st.session_state['login_time']}")
91
 
 
104
 
105
  model = load_model()
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # ==============================
108
  # LOAD DATA
109
  # ==============================
 
115
  st.stop()
116
 
117
  # ==============================
118
+ # PREPROCESS
 
 
 
 
 
 
 
 
119
  # ==============================
120
  df["combined"] = (
121
  df["product_name"].astype(str) + " " +
 
126
 
127
  products = df["combined"].tolist()
128
 
 
 
 
129
  @st.cache_resource
130
  def preprocess_data(products):
131
  tfidf = TfidfVectorizer()
132
  tfidf_matrix = tfidf.fit_transform(products)
133
 
134
+ embeddings = model.encode(products, show_progress_bar=False)
135
  faiss.normalize_L2(embeddings)
136
 
137
  index = faiss.IndexFlatIP(embeddings.shape[1])
 
145
  tfidf, tfidf_matrix, embeddings, index, bm25 = preprocess_data(products)
146
 
147
  # ==============================
148
+ # SEARCH FUNCTIONS
 
 
 
 
 
 
 
 
 
 
149
  # ==============================
150
  def keyword_search(q):
151
  return [(i, 1) for i, p in enumerate(products) if q.lower() in p.lower()]
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  def semantic_search(q):
154
+ q_emb = model.encode([q])
155
  faiss.normalize_L2(q_emb)
156
  scores = np.dot(embeddings, q_emb.T).flatten()
157
  return list(enumerate(scores))
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # ==============================
160
  # UI
161
  # ==============================
162
+ search_type = st.selectbox("πŸ”Ž Search Type", ["Keyword", "Semantic"])
 
 
 
 
 
 
 
 
163
  query = st.text_input("Enter your search query")
164
  top_k = st.slider("Top Results", 5, 20, 10)
165
 
166
  # ==============================
167
+ # SEARCH
168
  # ==============================
169
  if st.button("Search"):
170
  if not query:
171
  st.warning("Enter query")
172
  else:
173
+ if search_type == "Keyword":
174
+ results = keyword_search(query)
175
+ else:
176
+ results = semantic_search(query)
177
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
179
 
180
  # βœ… LOG SEARCH
181
+ log_activity(
182
+ st.session_state["user"],
183
+ "Search",
184
+ query,
185
+ search_type
186
+ )
187
 
188
  indices = [i for i, _ in results]
189
  result_df = df.iloc[indices].copy()
190
  result_df["Score"] = [round(score, 4) for _, score in results]
191
 
 
192
  st.dataframe(result_df)
193
 
194
  # ==============================
195
+ # VIEW LOGS
196
  # ==============================
197
+ st.sidebar.subheader("πŸ“Š User Logs")
198
 
199
  if os.path.exists(LOG_FILE):
200
  log_df = pd.read_csv(LOG_FILE)
201
  st.sidebar.dataframe(log_df.tail(10))
202
  else:
203
+ st.sidebar.write("No logs yet")