Spaces:

sharmithas151005
/

BookMate

Sleeping

App Files Files Community

BookMate / app.py

sharmithas151005

Upload 4 files

3f9b7af verified 6 months ago

raw

history blame contribute delete

3.83 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import pickle
	import faiss

	# ----------------------------
	# Load Saved Model Files
	# ----------------------------
	user_to_index = pickle.load(open("model/user_to_index.pkl", "rb"))
	item_to_index = pickle.load(open("model/item_to_index.pkl", "rb"))
	index_to_item = pickle.load(open("model/index_to_item.pkl", "rb"))

	user_factors = np.load("model/als_user_factors.npy")
	item_factors = np.load("model/als_item_factors.npy")
	item_embeddings = np.load("model/item_embeddings.npy")

	meta = pd.read_csv("model/meta_books.csv")

	# Ensure normalized embeddings for FAISS search
	item_embeddings = item_embeddings.astype("float32")
	faiss.normalize_L2(item_embeddings)
	faiss_index = faiss.IndexFlatIP(item_embeddings.shape[1])
	faiss_index.add(item_embeddings)

	# ----------------------------
	# Helper Functions
	# ----------------------------

	def recommend_hybrid_for_user(user_id, k=10, alpha=0.7, top_semantic=50):
	if user_id not in user_to_index:
	return []

	uidx = user_to_index[user_id]

	# ALS scores
	cf_scores = user_factors[uidx] @ item_factors.T

	# Items user already rated
	rated = meta.loc[meta["item_key"].isin(
	meta[meta["user_id"] == user_id]["item_key"] if "user_id" in meta.columns else []
	), "item_key"]

	rated_set = set(rated.values)

	# Semantic Scores
	semantic_scores = np.zeros(len(cf_scores), dtype=np.float32)

	top_cf = np.argsort(-cf_scores)[:20]
	for idx in top_cf:
	D, I = faiss_index.search(item_embeddings[idx].reshape(1, -1), top_semantic)
	semantic_scores[I[0]] += D[0]

	if semantic_scores.max() > 0:
	semantic_scores /= semantic_scores.max()

	hybrid = alpha * cf_scores + (1 - alpha) * semantic_scores

	# Mask rated items
	for i in range(len(hybrid)):
	if index_to_item[i] in rated_set:
	hybrid[i] = -np.inf

	top_items = np.argsort(-hybrid)[:k]

	recs = []
	for idx in top_items:
	item_id = index_to_item[idx]
	title = meta.loc[meta["item_key"] == item_id, "item_title"].values[0]
	recs.append((title, float(hybrid[idx]), item_id))

	return recs


	def recommend_similar_items(title, k=10):
	title = title.lower().strip()
	if title not in item_to_index:
	return []

	idx = item_to_index[title]
	D, I = faiss_index.search(item_embeddings[idx].reshape(1, -1), k + 1)

	recs = []
	for score, item_idx in zip(D[0][1:], I[0][1:]):
	item_id = index_to_item[item_idx]
	name = meta.loc[meta["item_key"] == item_id, "item_title"].values[0]
	recs.append((name, float(score), item_id))

	return recs

	# ----------------------------
	# Streamlit UI
	# ----------------------------

	st.title("Hybrid Book Recommendation System")

	tab1, tab2 = st.tabs(["🔹 Recommend for User", "🔍 Find Similar Books"])

	with tab1:
	user_id = st.text_input("Enter User ID:")
	if st.button("Recommend"):
	if user_id in user_to_index:
	results = recommend_hybrid_for_user(user_id)
	st.subheader("Recommended Books:")
	for title, score, item_id in results:
	st.write(f"{title} — score: {round(score, 4)}")
	else:
	st.error("User ID not found in dataset.")

	with tab2:
	book = st.text_input("Enter a Book Title:")
	if st.button("Find Similar"):
	results = recommend_similar_items(book)
	if results:
	st.subheader("Similar Books:")
	for title, score, item_id in results:
	st.write(f"{title} — similarity: {round(score, 4)}")
	else:
	st.error("Book not found or no similar results.")