BookMate / app.py
sharmithas151005's picture
Upload 4 files
3f9b7af verified
import streamlit as st
import numpy as np
import pandas as pd
import pickle
import faiss
# ----------------------------
# Load Saved Model Files
# ----------------------------
user_to_index = pickle.load(open("model/user_to_index.pkl", "rb"))
item_to_index = pickle.load(open("model/item_to_index.pkl", "rb"))
index_to_item = pickle.load(open("model/index_to_item.pkl", "rb"))
user_factors = np.load("model/als_user_factors.npy")
item_factors = np.load("model/als_item_factors.npy")
item_embeddings = np.load("model/item_embeddings.npy")
meta = pd.read_csv("model/meta_books.csv")
# Ensure normalized embeddings for FAISS search
item_embeddings = item_embeddings.astype("float32")
faiss.normalize_L2(item_embeddings)
faiss_index = faiss.IndexFlatIP(item_embeddings.shape[1])
faiss_index.add(item_embeddings)
# ----------------------------
# Helper Functions
# ----------------------------
def recommend_hybrid_for_user(user_id, k=10, alpha=0.7, top_semantic=50):
if user_id not in user_to_index:
return []
uidx = user_to_index[user_id]
# ALS scores
cf_scores = user_factors[uidx] @ item_factors.T
# Items user already rated
rated = meta.loc[meta["item_key"].isin(
meta[meta["user_id"] == user_id]["item_key"] if "user_id" in meta.columns else []
), "item_key"]
rated_set = set(rated.values)
# Semantic Scores
semantic_scores = np.zeros(len(cf_scores), dtype=np.float32)
top_cf = np.argsort(-cf_scores)[:20]
for idx in top_cf:
D, I = faiss_index.search(item_embeddings[idx].reshape(1, -1), top_semantic)
semantic_scores[I[0]] += D[0]
if semantic_scores.max() > 0:
semantic_scores /= semantic_scores.max()
hybrid = alpha * cf_scores + (1 - alpha) * semantic_scores
# Mask rated items
for i in range(len(hybrid)):
if index_to_item[i] in rated_set:
hybrid[i] = -np.inf
top_items = np.argsort(-hybrid)[:k]
recs = []
for idx in top_items:
item_id = index_to_item[idx]
title = meta.loc[meta["item_key"] == item_id, "item_title"].values[0]
recs.append((title, float(hybrid[idx]), item_id))
return recs
def recommend_similar_items(title, k=10):
title = title.lower().strip()
if title not in item_to_index:
return []
idx = item_to_index[title]
D, I = faiss_index.search(item_embeddings[idx].reshape(1, -1), k + 1)
recs = []
for score, item_idx in zip(D[0][1:], I[0][1:]):
item_id = index_to_item[item_idx]
name = meta.loc[meta["item_key"] == item_id, "item_title"].values[0]
recs.append((name, float(score), item_id))
return recs
# ----------------------------
# Streamlit UI
# ----------------------------
st.title("Hybrid Book Recommendation System")
tab1, tab2 = st.tabs(["πŸ”Ή Recommend for User", "πŸ” Find Similar Books"])
with tab1:
user_id = st.text_input("Enter User ID:")
if st.button("Recommend"):
if user_id in user_to_index:
results = recommend_hybrid_for_user(user_id)
st.subheader("Recommended Books:")
for title, score, item_id in results:
st.write(f"**{title}** β€” *score:* {round(score, 4)}")
else:
st.error("User ID not found in dataset.")
with tab2:
book = st.text_input("Enter a Book Title:")
if st.button("Find Similar"):
results = recommend_similar_items(book)
if results:
st.subheader("Similar Books:")
for title, score, item_id in results:
st.write(f"**{title}** β€” *similarity:* {round(score, 4)}")
else:
st.error("Book not found or no similar results.")