import os os.environ["HF_HOME"] = "/tmp/huggingface" os.environ["XDG_CACHE_HOME"] = "/tmp/cache" os.environ["STREAMLIT_CACHE_DIR"] = "/tmp/streamlit_cache" os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" import streamlit as st from PIL import Image import pandas as pd import numpy as np import pickle import streamlit.components.v1 as components import importlib.util import sys from huggingface_hub import hf_hub_download st.set_page_config(page_title="🛍️ Product Recommender", layout="wide") @st.cache_resource def dynamic_import(module_name, repo_id, file_path): local_path = hf_hub_download(repo_id=repo_id, filename=file_path) spec = importlib.util.spec_from_file_location(module_name, local_path) module = importlib.util.module_from_spec(spec) sys.modules[module_name] = module spec.loader.exec_module(module) return module @st.cache_resource def load_resources(): from huggingface_hub import hf_hub_download # 📥 Download remote files embeddings_path = hf_hub_download(repo_id="bobs24/Product_Recommendations", filename="embeddings.npy") image_urls_path = hf_hub_download(repo_id="bobs24/Product_Recommendations", filename="image_urls.pkl") product_data_path = hf_hub_download(repo_id="bobs24/Product_Recommendations", filename="data/product_data.csv") # ✅ Load data files embeddings = np.load(embeddings_path) with open(image_urls_path, "rb") as f: image_urls = pickle.load(f) product_data = pd.read_csv(product_data_path) # 📥 Dynamically import modules feature_extractor = dynamic_import("feature_extractor", "bobs24/Product_Recommendations", "model/feature_extractor.py") faiss_index = dynamic_import("faiss_index", "bobs24/Product_Recommendations", "utils/faiss_index.py") # 🧠 Initialize model + index fe = feature_extractor.FeatureExtractor() index = faiss_index.FaissIndex(dim=embeddings.shape[1]) index.build(embeddings, image_urls) return fe, index, image_urls, product_data fe, index, image_urls, product_data = load_resources() st.title("🛍️ Product Image Recommender") uploaded_file = st.file_uploader("Upload a product image", type=["jpg", "jpeg", "png"]) if uploaded_file: user_img = Image.open(uploaded_file).convert("RGB") st.image(user_img, caption="Uploaded Image", width=250) user_emb = fe.extract(user_img) results = index.search(user_emb, threshold=0.8, k=100) if len(results) > 0: input_image_url = results[0][0] # Get GROUP_ID of uploaded image input_group_id_series = product_data.loc[product_data['IMAGE'] == input_image_url, 'GROUP_ID'] input_group_id = input_group_id_series.values[0] if not input_group_id_series.empty else None # Get PRODUCT_NAME of uploaded image input_product_name_series = product_data.loc[product_data['IMAGE'] == input_image_url, 'PRODUCT_NAME'] input_product_name = input_product_name_series.values[0] if not input_product_name_series.empty else None # st.markdown(f"**GROUP_ID of uploaded image:** `{input_group_id}`") filtered_results = [] for url, sim in results: group_id_series = product_data.loc[product_data['IMAGE'] == url, 'GROUP_ID'] group_id = group_id_series.values[0] if not group_id_series.empty else None product_series = product_data.loc[product_data['IMAGE'] == url, 'PRODUCT_NAME'] product_name = product_series.values[0] if not product_series.empty else None # Rule: if GROUP_ID is None or 0, exclude same product name if (input_group_id is None or input_group_id == 0): if product_name != input_product_name: filtered_results.append((url, sim)) else: if group_id != input_group_id: filtered_results.append((url, sim)) seen_products = set() deduped_results = [] for url, sim in filtered_results: product_series = product_data.loc[product_data['IMAGE'] == url, 'PRODUCT_NAME'] product_name = product_series.values[0] if not product_series.empty else None if product_name and product_name not in seen_products: seen_products.add(product_name) deduped_results.append((url, sim)) top_results = deduped_results[:15] cards_html = "" for url, sim in top_results: brand = product_data.loc[product_data['IMAGE'] == url, 'BRAND_NAME'].values product = product_data.loc[product_data['IMAGE'] == url, 'PRODUCT_NAME'].values brand_name = brand[0] if len(brand) > 0 else "Unknown Brand" product_name = product[0] if len(product) > 0 else "Unknown Product" cards_html += f"""
{product_name}
Similarity: {sim:.2f}