Spaces:

pradeep4321
/

sample_multi_search

Sleeping

App Files Files Community

pradeep4321 commited on Apr 1

Commit

0a8b71e

verified ·

1 Parent(s): 3509587

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +60 -202

src/app.py CHANGED Viewed

@@ -18,7 +18,29 @@ nltk.download('wordnet', quiet=True)
 from nltk.corpus import wordnet
 # ==============================
-# AUTHENTICATION (HF FIXED)
 # ==============================
 def login():
     st.title("🔐 Login Required")
@@ -27,23 +49,25 @@ def login():
     password = st.text_input("Password", type="password")
     if st.button("Login"):
-        # ✅ HuggingFace secrets via environment
-        hf_user = os.environ.get("USERNAME", "admin")
-        hf_pass = os.environ.get("PASSWORD", "admin123")
-        if username == hf_user and password == hf_pass:
-            st.session_state["authenticated"] = True
-            st.session_state["user"] = username
-            st.session_state["login_time"] = pd.Timestamp.now()
-            log_activity(username, "Login Success", "-", "-")
-            st.success("✅ Login successful")
-            st.rerun()
-        else:
-            log_activity(username, "Login Failed", "-", "-")
-            st.error("❌ Invalid credentials")
 # ==============================
 # SESSION CONTROL
@@ -61,7 +85,7 @@ if not st.session_state["authenticated"]:
 st.set_page_config(page_title="Multi Search Engine", layout="wide")
 st.title("🔍 Advanced Multi-Search Product Engine")
-# Sidebar user info
 st.sidebar.success(f"👤 User: {st.session_state['user']}")
 st.sidebar.info(f"🕒 Login: {st.session_state['login_time']}")
@@ -80,53 +104,6 @@ def load_model():
 model = load_model()
-# ==============================
-# LOGGING FUNCTION (CSV SAVE)
-# ==============================
-LOG_FILE = "user_activity_log.csv"
-def log_activity(user, action, query, search_type):
-    log_entry = {
-        "User": user,
-        "Action": action,
-        "Query": query,
-        "Search_Type": search_type,
-        "Time": str(pd.Timestamp.now())
-    }
-    try:
-        if os.path.exists(LOG_FILE):
-            df_log = pd.read_csv(LOG_FILE)
-            df_log = pd.concat([df_log, pd.DataFrame([log_entry])], ignore_index=True)
-        else:
-            df_log = pd.DataFrame([log_entry])
-        df_log.to_csv(LOG_FILE, index=False)
-    except Exception as e:
-        st.warning(f"Logging failed: {e}")
-# ==============================
-# SEARCH INFO
-# ==============================
-search_info = {
-    "Keyword": ("Exact match", "iphone"),
-    "Regex": ("Pattern match", "^Samsung"),
-    "Boolean": ("AND / OR logic", "nike AND shoes"),
-    "Fuzzy": ("Spelling mistakes", "iphon"),
-    "N-Gram": ("Partial word match", "iph"),
-    "Prefix": ("Word starts with", "Sam"),
-    "Suffix": ("Word ends with", "phone"),
-    "TF-IDF": ("Keyword ranking", "wireless headphones"),
-    "BM25": ("Advanced ranking", "gaming laptop"),
-    "Semantic": ("Meaning search", "sports footwear"),
-    "FAISS": ("Fast semantic search", "music device"),
-    "Hybrid": ("TF-IDF + Semantic", "running shoes"),
-    "Query Expansion": ("Auto synonyms", "speaker"),
-    "Weighted Hybrid": ("TF-IDF + Semantic + BM25", "best laptop"),
-    "Ensemble": ("Combine all scores", "smartphone")
-}
 # ==============================
 # LOAD DATA
 # ==============================
@@ -138,15 +115,7 @@ except Exception as e:
     st.stop()
 # ==============================
-# DATA PREVIEW
-# ==============================
-st.subheader("📄 Data Preview")
-rows_to_show = st.selectbox("Select rows to view", [10, 20, 50, 100])
-st.dataframe(df.head(rows_to_show))
-# ==============================
-# COMBINE TEXT
 # ==============================
 df["combined"] = (
     df["product_name"].astype(str) + " " +
@@ -157,15 +126,12 @@ df["combined"] = (
 products = df["combined"].tolist()
-# ==============================
-# PREPROCESS
-# ==============================
 @st.cache_resource
 def preprocess_data(products):
     tfidf = TfidfVectorizer()
     tfidf_matrix = tfidf.fit_transform(products)
-    embeddings = model.encode(products, batch_size=64, show_progress_bar=False)
     faiss.normalize_L2(embeddings)
     index = faiss.IndexFlatIP(embeddings.shape[1])
@@ -179,167 +145,59 @@ def preprocess_data(products):
 tfidf, tfidf_matrix, embeddings, index, bm25 = preprocess_data(products)
 # ==============================
-# SYNONYMS
-# ==============================
-def get_synonyms(word):
-    synonyms = set()
-    for syn in wordnet.synsets(word):
-        for lemma in syn.lemmas():
-            synonyms.add(lemma.name())
-    return synonyms
-# ==============================
-# SEARCH FUNCTIONS (UNCHANGED)
 # ==============================
 def keyword_search(q):
     return [(i, 1) for i, p in enumerate(products) if q.lower() in p.lower()]
-def regex_search(q):
-    return [(i, 1) for i, p in enumerate(products) if re.search(q, p, re.IGNORECASE)]
-def boolean_search(q):
-    if "AND" in q:
-        terms = q.split("AND")
-        return [(i, 1) for i, p in enumerate(products)
-                if all(t.strip().lower() in p.lower() for t in terms)]
-    elif "OR" in q:
-        terms = q.split("OR")
-        return [(i, 1) for i, p in enumerate(products)
-                if any(t.strip().lower() in p.lower() for t in terms)]
-    return []
-def fuzzy_search(q):
-    scores = [(i, fuzz.ratio(q, p)) for i, p in enumerate(products)]
-    return sorted(scores, key=lambda x: x[1], reverse=True)
-def ngram_search(q):
-    return [(i, 1) for i, p in enumerate(products)
-            if any(q.lower() in word for word in p.lower().split())]
-def prefix_search(q):
-    return [(i, 1) for i, p in enumerate(products)
-            if any(word.startswith(q.lower()) for word in p.lower().split())]
-def suffix_search(q):
-    return [(i, 1) for i, p in enumerate(products)
-            if any(word.endswith(q.lower()) for word in p.lower().split())]
-def tfidf_search(q):
-    q_vec = tfidf.transform([q])
-    scores = (tfidf_matrix @ q_vec.T).toarray().flatten()
-    return list(enumerate(scores))
-def bm25_search(q):
-    scores = bm25.get_scores(q.split())
-    return list(enumerate(scores))
 def semantic_search(q):
-    q_emb = model.encode([q], show_progress_bar=False)
     faiss.normalize_L2(q_emb)
     scores = np.dot(embeddings, q_emb.T).flatten()
     return list(enumerate(scores))
-def faiss_search(q):
-    q_emb = model.encode([q], show_progress_bar=False)
-    faiss.normalize_L2(q_emb)
-    D, I = index.search(np.array(q_emb), 10)
-    return [(i, float(D[0][idx])) for idx, i in enumerate(I[0])]
-def hybrid_search(q):
-    tfidf_res = dict(tfidf_search(q))
-    sem_res = dict(semantic_search(q))
-    return [(i, tfidf_res.get(i, 0) + sem_res.get(i, 0)) for i in range(len(products))]
-def query_expansion_search(q):
-    expanded = q.split()
-    for word in q.split():
-        expanded += list(get_synonyms(word))
-    return tfidf_search(" ".join(expanded))
-def weighted_hybrid(q):
-    tfidf_res = dict(tfidf_search(q))
-    sem_res = dict(semantic_search(q))
-    bm25_res = dict(bm25_search(q))
-    return [(i,
-             0.4 * tfidf_res.get(i, 0) +
-             0.4 * sem_res.get(i, 0) +
-             0.2 * bm25_res.get(i, 0))
-            for i in range(len(products))]
-def ensemble_search(q):
-    tfidf_res = np.array([s for _, s in tfidf_search(q)])
-    sem_res = np.array([s for _, s in semantic_search(q)])
-    bm25_res = np.array([s for _, s in bm25_search(q)])
-    combined = (
-        tfidf_res / (np.max(tfidf_res) + 1e-6) +
-        sem_res / (np.max(sem_res) + 1e-6) +
-        bm25_res / (np.max(bm25_res) + 1e-6)
-    )
-    return list(enumerate(combined))
 # ==============================
 # UI
 # ==============================
-search_type = st.selectbox("🔎 Select Search Type", list(search_info.keys()))
-explanation, example = search_info[search_type]
-st.markdown(f"""
-### 🔍 {search_type}
-- **Explanation:** {explanation}
-- **Example:** `{example}`
-""")
 query = st.text_input("Enter your search query")
 top_k = st.slider("Top Results", 5, 20, 10)
 # ==============================
-# SEARCH EXECUTION
 # ==============================
 if st.button("Search"):
     if not query:
         st.warning("Enter query")
     else:
-        func_map = {
-            "Keyword": keyword_search,
-            "Regex": regex_search,
-            "Boolean": boolean_search,
-            "Fuzzy": fuzzy_search,
-            "N-Gram": ngram_search,
-            "Prefix": prefix_search,
-            "Suffix": suffix_search,
-            "TF-IDF": tfidf_search,
-            "BM25": bm25_search,
-            "Semantic": semantic_search,
-            "FAISS": faiss_search,
-            "Hybrid": hybrid_search,
-            "Query Expansion": query_expansion_search,
-            "Weighted Hybrid": weighted_hybrid,
-            "Ensemble": ensemble_search
-        }
-        results = func_map[search_type](query)
         results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
         # ✅ LOG SEARCH
-        log_activity(st.session_state["user"], "Search", query, search_type)
         indices = [i for i, _ in results]
         result_df = df.iloc[indices].copy()
         result_df["Score"] = [round(score, 4) for _, score in results]
-        st.subheader("🔎 Results")
         st.dataframe(result_df)
 # ==============================
-# SHOW LOGS
 # ==============================
-st.sidebar.subheader("📊 Activity Log")
 if os.path.exists(LOG_FILE):
     log_df = pd.read_csv(LOG_FILE)
     st.sidebar.dataframe(log_df.tail(10))
 else:
-    st.sidebar.write("No activity yet")

 from nltk.corpus import wordnet
 # ==============================
+# LOG FUNCTION (FIXED POSITION)
+# ==============================
+LOG_FILE = "user_logs.csv"
+def log_activity(user, action, query, search_type):
+    log_entry = {
+        "User": user,
+        "Action": action,
+        "Query": query,
+        "Search Type": search_type,
+        "Time": str(pd.Timestamp.now())
+    }
+    if os.path.exists(LOG_FILE):
+        df_log = pd.read_csv(LOG_FILE)
+        df_log = pd.concat([df_log, pd.DataFrame([log_entry])])
+    else:
+        df_log = pd.DataFrame([log_entry])
+    df_log.to_csv(LOG_FILE, index=False)
+# ==============================
+# AUTHENTICATION
 # ==============================
 def login():
     st.title("🔐 Login Required")
     password = st.text_input("Password", type="password")
     if st.button("Login"):
+        try:
+            if (
+                username == st.secrets["USERNAME"] and
+                password == st.secrets["PASSWORD"]
+            ):
+                st.session_state["authenticated"] = True
+                st.session_state["user"] = username
+                st.session_state["login_time"] = pd.Timestamp.now()
+                log_activity(username, "Login Success", "-", "-")
+                st.success("✅ Login successful")
+                st.rerun()
+            else:
+                log_activity(username, "Login Failed", "-", "-")
+                st.error("❌ Invalid credentials")
+        except Exception:
+            st.error("⚠️ Secrets not configured properly")
 # ==============================
 # SESSION CONTROL
 st.set_page_config(page_title="Multi Search Engine", layout="wide")
 st.title("🔍 Advanced Multi-Search Product Engine")
+# Sidebar info
 st.sidebar.success(f"👤 User: {st.session_state['user']}")
 st.sidebar.info(f"🕒 Login: {st.session_state['login_time']}")
 model = load_model()
 # ==============================
 # LOAD DATA
 # ==============================
     st.stop()
 # ==============================
+# PREPROCESS
 # ==============================
 df["combined"] = (
     df["product_name"].astype(str) + " " +
 products = df["combined"].tolist()
 @st.cache_resource
 def preprocess_data(products):
     tfidf = TfidfVectorizer()
     tfidf_matrix = tfidf.fit_transform(products)
+    embeddings = model.encode(products, show_progress_bar=False)
     faiss.normalize_L2(embeddings)
     index = faiss.IndexFlatIP(embeddings.shape[1])
 tfidf, tfidf_matrix, embeddings, index, bm25 = preprocess_data(products)
 # ==============================
+# SEARCH FUNCTIONS
 # ==============================
 def keyword_search(q):
     return [(i, 1) for i, p in enumerate(products) if q.lower() in p.lower()]
 def semantic_search(q):
+    q_emb = model.encode([q])
     faiss.normalize_L2(q_emb)
     scores = np.dot(embeddings, q_emb.T).flatten()
     return list(enumerate(scores))
 # ==============================
 # UI
 # ==============================
+search_type = st.selectbox("🔎 Search Type", ["Keyword", "Semantic"])
 query = st.text_input("Enter your search query")
 top_k = st.slider("Top Results", 5, 20, 10)
 # ==============================
+# SEARCH
 # ==============================
 if st.button("Search"):
     if not query:
         st.warning("Enter query")
     else:
+        if search_type == "Keyword":
+            results = keyword_search(query)
+        else:
+            results = semantic_search(query)
         results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
         # ✅ LOG SEARCH
+        log_activity(
+            st.session_state["user"],
+            "Search",
+            query,
+            search_type
+        )
         indices = [i for i, _ in results]
         result_df = df.iloc[indices].copy()
         result_df["Score"] = [round(score, 4) for _, score in results]
         st.dataframe(result_df)
 # ==============================
+# VIEW LOGS
 # ==============================
+st.sidebar.subheader("📊 User Logs")
 if os.path.exists(LOG_FILE):
     log_df = pd.read_csv(LOG_FILE)
     st.sidebar.dataframe(log_df.tail(10))
 else:
+    st.sidebar.write("No logs yet")