Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| import pickle | |
| import faiss | |
| # ---------------------------- | |
| # Load Saved Model Files | |
| # ---------------------------- | |
| user_to_index = pickle.load(open("model/user_to_index.pkl", "rb")) | |
| item_to_index = pickle.load(open("model/item_to_index.pkl", "rb")) | |
| index_to_item = pickle.load(open("model/index_to_item.pkl", "rb")) | |
| user_factors = np.load("model/als_user_factors.npy") | |
| item_factors = np.load("model/als_item_factors.npy") | |
| item_embeddings = np.load("model/item_embeddings.npy") | |
| meta = pd.read_csv("model/meta_books.csv") | |
| # Ensure normalized embeddings for FAISS search | |
| item_embeddings = item_embeddings.astype("float32") | |
| faiss.normalize_L2(item_embeddings) | |
| faiss_index = faiss.IndexFlatIP(item_embeddings.shape[1]) | |
| faiss_index.add(item_embeddings) | |
| # ---------------------------- | |
| # Helper Functions | |
| # ---------------------------- | |
| def recommend_hybrid_for_user(user_id, k=10, alpha=0.7, top_semantic=50): | |
| if user_id not in user_to_index: | |
| return [] | |
| uidx = user_to_index[user_id] | |
| # ALS scores | |
| cf_scores = user_factors[uidx] @ item_factors.T | |
| # Items user already rated | |
| rated = meta.loc[meta["item_key"].isin( | |
| meta[meta["user_id"] == user_id]["item_key"] if "user_id" in meta.columns else [] | |
| ), "item_key"] | |
| rated_set = set(rated.values) | |
| # Semantic Scores | |
| semantic_scores = np.zeros(len(cf_scores), dtype=np.float32) | |
| top_cf = np.argsort(-cf_scores)[:20] | |
| for idx in top_cf: | |
| D, I = faiss_index.search(item_embeddings[idx].reshape(1, -1), top_semantic) | |
| semantic_scores[I[0]] += D[0] | |
| if semantic_scores.max() > 0: | |
| semantic_scores /= semantic_scores.max() | |
| hybrid = alpha * cf_scores + (1 - alpha) * semantic_scores | |
| # Mask rated items | |
| for i in range(len(hybrid)): | |
| if index_to_item[i] in rated_set: | |
| hybrid[i] = -np.inf | |
| top_items = np.argsort(-hybrid)[:k] | |
| recs = [] | |
| for idx in top_items: | |
| item_id = index_to_item[idx] | |
| title = meta.loc[meta["item_key"] == item_id, "item_title"].values[0] | |
| recs.append((title, float(hybrid[idx]), item_id)) | |
| return recs | |
| def recommend_similar_items(title, k=10): | |
| title = title.lower().strip() | |
| if title not in item_to_index: | |
| return [] | |
| idx = item_to_index[title] | |
| D, I = faiss_index.search(item_embeddings[idx].reshape(1, -1), k + 1) | |
| recs = [] | |
| for score, item_idx in zip(D[0][1:], I[0][1:]): | |
| item_id = index_to_item[item_idx] | |
| name = meta.loc[meta["item_key"] == item_id, "item_title"].values[0] | |
| recs.append((name, float(score), item_id)) | |
| return recs | |
| # ---------------------------- | |
| # Streamlit UI | |
| # ---------------------------- | |
| st.title("Hybrid Book Recommendation System") | |
| tab1, tab2 = st.tabs(["πΉ Recommend for User", "π Find Similar Books"]) | |
| with tab1: | |
| user_id = st.text_input("Enter User ID:") | |
| if st.button("Recommend"): | |
| if user_id in user_to_index: | |
| results = recommend_hybrid_for_user(user_id) | |
| st.subheader("Recommended Books:") | |
| for title, score, item_id in results: | |
| st.write(f"**{title}** β *score:* {round(score, 4)}") | |
| else: | |
| st.error("User ID not found in dataset.") | |
| with tab2: | |
| book = st.text_input("Enter a Book Title:") | |
| if st.button("Find Similar"): | |
| results = recommend_similar_items(book) | |
| if results: | |
| st.subheader("Similar Books:") | |
| for title, score, item_id in results: | |
| st.write(f"**{title}** β *similarity:* {round(score, 4)}") | |
| else: | |
| st.error("Book not found or no similar results.") | |