Spaces:
Running
Running
Update src/app.py
Browse files- src/app.py +25 -26
src/app.py
CHANGED
|
@@ -32,7 +32,7 @@ st.set_page_config(page_title="Multi Search Engine", layout="wide")
|
|
| 32 |
st.title("π Advanced Multi-Search Product Engine")
|
| 33 |
|
| 34 |
# ==============================
|
| 35 |
-
# LOAD MODEL
|
| 36 |
# ==============================
|
| 37 |
@st.cache_resource
|
| 38 |
def load_model():
|
|
@@ -62,25 +62,19 @@ search_info = {
|
|
| 62 |
}
|
| 63 |
|
| 64 |
# ==============================
|
| 65 |
-
# DATA SOURCE
|
| 66 |
# ==============================
|
| 67 |
-
data_option = st.radio("π Choose Data Source", ["Sample Data", "
|
| 68 |
-
|
| 69 |
-
if data_option == "
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
except Exception as e:
|
| 77 |
-
st.error(f"Error reading file: {e}")
|
| 78 |
-
st.stop()
|
| 79 |
-
else:
|
| 80 |
-
st.warning("β οΈ Upload a file or switch to Sample Data")
|
| 81 |
-
st.stop()
|
| 82 |
|
| 83 |
-
|
| 84 |
df = pd.DataFrame({
|
| 85 |
"product_name": [
|
| 86 |
"iPhone 14 Pro",
|
|
@@ -102,10 +96,18 @@ else:
|
|
| 102 |
st.info("Using sample dataset")
|
| 103 |
|
| 104 |
# ==============================
|
| 105 |
-
# DATA PREVIEW
|
| 106 |
# ==============================
|
| 107 |
st.subheader("π Data Preview")
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
# ==============================
|
| 111 |
# COMBINE TEXT
|
|
@@ -120,7 +122,7 @@ df["combined"] = (
|
|
| 120 |
products = df["combined"].tolist()
|
| 121 |
|
| 122 |
# ==============================
|
| 123 |
-
#
|
| 124 |
# ==============================
|
| 125 |
@st.cache_resource
|
| 126 |
def preprocess_data(products):
|
|
@@ -149,9 +151,6 @@ def get_synonyms(word):
|
|
| 149 |
synonyms.add(lemma.name())
|
| 150 |
return synonyms
|
| 151 |
|
| 152 |
-
# ==============================
|
| 153 |
-
# PREPROCESS
|
| 154 |
-
# ==============================
|
| 155 |
with st.spinner("βοΈ Processing data..."):
|
| 156 |
tfidf, tfidf_matrix, embeddings, index, bm25 = preprocess_data(products)
|
| 157 |
|
|
@@ -245,7 +244,7 @@ def ensemble_search(q):
|
|
| 245 |
return sorted(results.items(), key=lambda x: x[1], reverse=True)[:10]
|
| 246 |
|
| 247 |
# ==============================
|
| 248 |
-
# UI
|
| 249 |
# ==============================
|
| 250 |
search_type = st.selectbox("π Select Search Type", list(search_info.keys()))
|
| 251 |
|
|
@@ -294,4 +293,4 @@ if st.button("Search"):
|
|
| 294 |
result_df["Score"] = [score for _, score in results]
|
| 295 |
|
| 296 |
st.subheader("π Results")
|
| 297 |
-
st.dataframe(result_df)
|
|
|
|
| 32 |
st.title("π Advanced Multi-Search Product Engine")
|
| 33 |
|
| 34 |
# ==============================
|
| 35 |
+
# LOAD MODEL
|
| 36 |
# ==============================
|
| 37 |
@st.cache_resource
|
| 38 |
def load_model():
|
|
|
|
| 62 |
}
|
| 63 |
|
| 64 |
# ==============================
|
| 65 |
+
# DATA SOURCE
|
| 66 |
# ==============================
|
| 67 |
+
data_option = st.radio("π Choose Data Source", ["Sample Data", "Default CSV (from repo)"])
|
| 68 |
+
|
| 69 |
+
if data_option == "Default CSV (from repo)":
|
| 70 |
+
try:
|
| 71 |
+
df = pd.read_csv("products_sample.csv")
|
| 72 |
+
st.success("β
Loaded dataset from repository")
|
| 73 |
+
except:
|
| 74 |
+
st.error("β products_sample.csv not found. Using sample data instead.")
|
| 75 |
+
data_option = "Sample Data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
if data_option == "Sample Data":
|
| 78 |
df = pd.DataFrame({
|
| 79 |
"product_name": [
|
| 80 |
"iPhone 14 Pro",
|
|
|
|
| 96 |
st.info("Using sample dataset")
|
| 97 |
|
| 98 |
# ==============================
|
| 99 |
+
# DATA PREVIEW (ROW CONTROL)
|
| 100 |
# ==============================
|
| 101 |
st.subheader("π Data Preview")
|
| 102 |
+
|
| 103 |
+
row_limit = st.selectbox(
|
| 104 |
+
"Select number of rows to view",
|
| 105 |
+
[5, 10, 20, 30, 50, 100],
|
| 106 |
+
index=1
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
st.caption(f"Showing top {row_limit} rows")
|
| 110 |
+
st.dataframe(df.head(row_limit), use_container_width=True)
|
| 111 |
|
| 112 |
# ==============================
|
| 113 |
# COMBINE TEXT
|
|
|
|
| 122 |
products = df["combined"].tolist()
|
| 123 |
|
| 124 |
# ==============================
|
| 125 |
+
# PREPROCESSING
|
| 126 |
# ==============================
|
| 127 |
@st.cache_resource
|
| 128 |
def preprocess_data(products):
|
|
|
|
| 151 |
synonyms.add(lemma.name())
|
| 152 |
return synonyms
|
| 153 |
|
|
|
|
|
|
|
|
|
|
| 154 |
with st.spinner("βοΈ Processing data..."):
|
| 155 |
tfidf, tfidf_matrix, embeddings, index, bm25 = preprocess_data(products)
|
| 156 |
|
|
|
|
| 244 |
return sorted(results.items(), key=lambda x: x[1], reverse=True)[:10]
|
| 245 |
|
| 246 |
# ==============================
|
| 247 |
+
# SEARCH UI
|
| 248 |
# ==============================
|
| 249 |
search_type = st.selectbox("π Select Search Type", list(search_info.keys()))
|
| 250 |
|
|
|
|
| 293 |
result_df["Score"] = [score for _, score in results]
|
| 294 |
|
| 295 |
st.subheader("π Results")
|
| 296 |
+
st.dataframe(result_df, use_container_width=True)
|