Spaces:

pradeep4321
/

sample_multi_search

Sleeping

App Files Files Community

pradeep4321 commited on Apr 1

Commit

fb3391a

verified ·

1 Parent(s): f5f3b3e

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +27 -53

src/app.py CHANGED Viewed

@@ -25,30 +25,27 @@ st.title("🔍 Advanced Multi-Search Product Engine")
 # ==============================
 # LOAD MODEL
 # ==============================
-if "model" not in st.session_state:
-    with st.spinner("Loading AI model..."):
-        st.session_state.model = SentenceTransformer(
-            'all-MiniLM-L6-v2',
-            device='cpu'
-        )
-model = st.session_state.model
 # ==============================
-# SEARCH INFO (UPDATED)
 # ==============================
 search_info = {
     "Keyword": ("Exact match", "iphone"),
     "Regex": ("Pattern match", "^Samsung"),
     "Boolean": ("AND / OR logic", "nike AND shoes"),
     "Fuzzy": ("Spelling mistakes", "iphon"),
-    "N-Gram": ("Partial word", "iph"),
     "Prefix": ("Word starts with", "Sam"),
     "Suffix": ("Word ends with", "phone"),
     "TF-IDF": ("Keyword ranking", "wireless headphones"),
     "BM25": ("Advanced ranking", "gaming laptop"),
     "Semantic": ("Meaning search", "sports footwear"),
-    "FAISS": ("Fast semantic", "music device"),
     "Hybrid": ("TF-IDF + Semantic", "running shoes"),
     "Query Expansion": ("Auto synonyms", "speaker"),
     "Weighted Hybrid": ("TF-IDF + Semantic + BM25", "best laptop"),
@@ -56,35 +53,21 @@ search_info = {
 }
 # ==============================
-# FILE LOAD (KEEP YOUR LOGIC)
 # ==============================
-#uploaded_file = st.file_uploader("Upload CSV", type=["csv"])
-if uploaded_file:
-    df = pd.read_csv("src/products_10k.csv")
-else:
-    st.info("Using sample dataset")
-    df = pd.DataFrame({
-        "product_name": [
-            "iPhone 14 Pro",
-            "Samsung Galaxy S23",
-            "Nike Running Shoes",
-            "Dell Gaming Laptop",
-            "Bluetooth Speaker"
-        ],
-        "category": ["Mobile", "Mobile", "Footwear", "Laptop", "Electronics"],
-        "brand": ["Apple", "Samsung", "Nike", "Dell", "JBL"],
-        "description": [
-            "Latest smartphone",
-            "Android flagship phone",
-            "Comfort sports shoes",
-            "High performance laptop",
-            "Portable music device"
-        ]
-    })
 # ==============================
-# DATA PREVIEW CONTROL
 # ==============================
 st.subheader("📄 Data Preview")
@@ -106,7 +89,7 @@ products = df["combined"].tolist()
 # ==============================
 # PREPROCESS
 # ==============================
-@st.cache(allow_output_mutation=True)
 def preprocess_data(products):
     tfidf = TfidfVectorizer()
     tfidf_matrix = tfidf.fit_transform(products)
@@ -159,14 +142,13 @@ def fuzzy_search(q):
     return sorted(scores, key=lambda x: x[1], reverse=True)
 def ngram_search(q):
-    return [(i, 1) for i, p in enumerate(products) if q.lower() in p.lower()]
-# ✅ FIXED PREFIX (word-level)
 def prefix_search(q):
     return [(i, 1) for i, p in enumerate(products)
             if any(word.startswith(q.lower()) for word in p.lower().split())]
-# ✅ FIXED SUFFIX (word-level)
 def suffix_search(q):
     return [(i, 1) for i, p in enumerate(products)
             if any(word.endswith(q.lower()) for word in p.lower().split())]
@@ -197,14 +179,12 @@ def hybrid_search(q):
     sem_res = dict(semantic_search(q))
     return [(i, tfidf_res.get(i, 0) + sem_res.get(i, 0)) for i in range(len(products))]
-# ✅ IMPROVED QUERY EXPANSION
 def query_expansion_search(q):
     expanded = q.split()
     for word in q.split():
         expanded += list(get_synonyms(word))
     return tfidf_search(" ".join(expanded))
-# ✅ IMPROVED WEIGHTED HYBRID
 def weighted_hybrid(q):
     tfidf_res = dict(tfidf_search(q))
     sem_res = dict(semantic_search(q))
@@ -216,15 +196,16 @@ def weighted_hybrid(q):
              0.2 * bm25_res.get(i, 0))
             for i in range(len(products))]
-# ✅ FIXED ENSEMBLE (NORMALIZED)
 def ensemble_search(q):
     tfidf_res = np.array([s for _, s in tfidf_search(q)])
     sem_res = np.array([s for _, s in semantic_search(q)])
     bm25_res = np.array([s for _, s in bm25_search(q)])
-    combined = tfidf_res/np.max(tfidf_res+1e-6) + \
-               sem_res/np.max(sem_res+1e-6) + \
-               bm25_res/np.max(bm25_res+1e-6)
     return list(enumerate(combined))
@@ -241,11 +222,6 @@ st.markdown(f"""
 """)
 query = st.text_input("Enter your search query")
-if st.button("Try Example"):
-    query = example
-    st.success(f"Loaded: {query}")
 top_k = st.slider("Top Results", 5, 20, 10)
 # ==============================
@@ -274,8 +250,6 @@ if st.button("Search"):
         }
         results = func_map[search_type](query)
-        # Sort results
         results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
         indices = [i for i, _ in results]

 # ==============================
 # LOAD MODEL
 # ==============================
+@st.cache_resource
+def load_model():
+    return SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
+model = load_model()
 # ==============================
+# SEARCH INFO
 # ==============================
 search_info = {
     "Keyword": ("Exact match", "iphone"),
     "Regex": ("Pattern match", "^Samsung"),
     "Boolean": ("AND / OR logic", "nike AND shoes"),
     "Fuzzy": ("Spelling mistakes", "iphon"),
+    "N-Gram": ("Partial word match", "iph"),
     "Prefix": ("Word starts with", "Sam"),
     "Suffix": ("Word ends with", "phone"),
     "TF-IDF": ("Keyword ranking", "wireless headphones"),
     "BM25": ("Advanced ranking", "gaming laptop"),
     "Semantic": ("Meaning search", "sports footwear"),
+    "FAISS": ("Fast semantic search", "music device"),
     "Hybrid": ("TF-IDF + Semantic", "running shoes"),
     "Query Expansion": ("Auto synonyms", "speaker"),
     "Weighted Hybrid": ("TF-IDF + Semantic + BM25", "best laptop"),
 }
 # ==============================
+# LOAD DATA
 # ==============================
+try:
+    df = pd.read_csv("products_10k.csv")
+    st.success("✅ Data loaded successfully")
+except Exception as e:
+    st.error(f"❌ Error loading file: {e}")
+    st.stop()
+if df.empty:
+    st.error("Dataset is empty!")
+    st.stop()
 # ==============================
+# DATA PREVIEW
 # ==============================
 st.subheader("📄 Data Preview")
 # ==============================
 # PREPROCESS
 # ==============================
+@st.cache_resource
 def preprocess_data(products):
     tfidf = TfidfVectorizer()
     tfidf_matrix = tfidf.fit_transform(products)
     return sorted(scores, key=lambda x: x[1], reverse=True)
 def ngram_search(q):
+    return [(i, 1) for i, p in enumerate(products)
+            if any(q.lower() in word for word in p.lower().split())]
 def prefix_search(q):
     return [(i, 1) for i, p in enumerate(products)
             if any(word.startswith(q.lower()) for word in p.lower().split())]
 def suffix_search(q):
     return [(i, 1) for i, p in enumerate(products)
             if any(word.endswith(q.lower()) for word in p.lower().split())]
     sem_res = dict(semantic_search(q))
     return [(i, tfidf_res.get(i, 0) + sem_res.get(i, 0)) for i in range(len(products))]
 def query_expansion_search(q):
     expanded = q.split()
     for word in q.split():
         expanded += list(get_synonyms(word))
     return tfidf_search(" ".join(expanded))
 def weighted_hybrid(q):
     tfidf_res = dict(tfidf_search(q))
     sem_res = dict(semantic_search(q))
              0.2 * bm25_res.get(i, 0))
             for i in range(len(products))]
 def ensemble_search(q):
     tfidf_res = np.array([s for _, s in tfidf_search(q)])
     sem_res = np.array([s for _, s in semantic_search(q)])
     bm25_res = np.array([s for _, s in bm25_search(q)])
+    combined = (
+        tfidf_res / (np.max(tfidf_res) + 1e-6) +
+        sem_res / (np.max(sem_res) + 1e-6) +
+        bm25_res / (np.max(bm25_res) + 1e-6)
+    )
     return list(enumerate(combined))
 """)
 query = st.text_input("Enter your search query")
 top_k = st.slider("Top Results", 5, 20, 10)
 # ==============================
         }
         results = func_map[search_type](query)
         results = sorted(results, key=lambda x: x[1], reverse=True)[:top_k]
         indices = [i for i, _ in results]