Spaces:

Abdallah4Zain
/

TasteEngine

Sleeping

App Files Files Community

TasteEngine / app.py

Abdallah4z

Production readiness: port 7860, Dockerfile, HF Spaces config

f9ade9b 15 days ago

raw

history blame contribute delete

19.9 kB

	import os
	import sys
	import logging
	import traceback

	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
	os.environ.setdefault("OMP_NUM_THREADS", "1")

	import json as json_module
	import numpy as np
	import pandas as pd
	from flask import Flask, render_template, jsonify, request, Response, stream_with_context
	from utils.helpers import load_data, get_user_preferences
	from recommender.collaborative import CollaborativeFiltering
	from recommender.content_based import ContentBasedRecommender
	from recommender.knowledge_based import KnowledgeBasedRecommender
	from recommender.evaluation import Evaluator
	from recommender.explainer import Explainer

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s [%(levelname)s] %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S",
	)
	logger = logging.getLogger(__name__)

	app = Flask(__name__)
	app.secret_key = os.environ.get(
	"FLASK_SECRET_KEY",
	os.urandom(24).hex()
	)

	products, users, ratings = load_data()
	cf = CollaborativeFiltering(ratings)
	cb = ContentBasedRecommender(products)
	kb = KnowledgeBasedRecommender(products)
	explainer = Explainer(products, users)
	evaluator = Evaluator(ratings)

	TRAIN = ratings.sample(frac=0.8, random_state=42)
	TEST = ratings.drop(TRAIN.index)
	cf_train = CollaborativeFiltering(TRAIN)

	USER_IDS = sorted(users["user_id"].tolist())
	USER_OPTIONS = []
	for _, u in users.iterrows():
	USER_OPTIONS.append({
	"id": int(u["user_id"]),
	"name": u["name"],
	"age": int(u["age"]),
	"categories": u["preferred_categories"].split(",") if isinstance(u["preferred_categories"], str) else [],
	"budget_min": float(u["budget_min"]),
	"budget_max": float(u["budget_max"]),
	"brands": u["favorite_brands"].split(",") if isinstance(u["favorite_brands"], str) else [],
	})

	CATEGORIES = sorted(products["category"].unique().tolist())
	BRANDS = sorted(products["brand"].unique().tolist())

	APPROACHES = {
	"cf": {
	"label": "Collaborative Filtering",
	"icon": "🤝",
	"methods": [
	{"id": "user_based", "label": "User-Based CF"},
	{"id": "item_based", "label": "Item-Based CF"},
	{"id": "svd", "label": "SVD (Matrix Factorization)"},
	{"id": "knn", "label": "KNN-Based CF"},
	{"id": "slope_one", "label": "Slope One"},
	],
	},
	"content": {
	"label": "Content-Based",
	"icon": "🏷️",
	"methods": [
	{"id": "tfidf", "label": "TF-IDF Similarity"},
	{"id": "feature_match", "label": "Feature Matching"},
	],
	},
	"knowledge": {
	"label": "Knowledge-Based",
	"icon": "⚙️",
	"methods": [
	{"id": "constraint", "label": "Constraint-Based"},
	{"id": "rule", "label": "Rule-Based"},
	{"id": "utility", "label": "Utility-Based"},
	],
	},
	}

	if os.environ.get("PREWARM_CACHE", "0") == "1":
	import threading
	logger.info("Pre-warming SVD and Slope One caches...")
	threading.Thread(
	target=lambda: (
	list(cf_train.train_svd_generator()),
	list(cf_train.compute_slope_one_dev_generator()),
	),
	daemon=True,
	).start()


	def get_product_info(product_id):
	row = products[products["product_id"] == product_id]
	if row.empty:
	return None
	row = row.iloc[0]
	return {
	"id": int(row["product_id"]),
	"name": row["name"],
	"category": row["category"],
	"subcategory": row["subcategory"],
	"brand": row["brand"],
	"price": float(row["price"]),
	"avg_rating": float(row["avg_rating"]),
	"num_reviews": int(row["num_reviews"]),
	}


	@app.route("/health")
	def health():
	return jsonify({"status": "ok", "users": len(USER_OPTIONS), "products": len(products)})


	@app.route("/")
	def index():
	return render_template("index.html",
	active_page="home",
	users=USER_OPTIONS,
	categories=CATEGORIES,
	brands=BRANDS,
	approaches=APPROACHES)


	@app.route("/recommend")
	def recommend_page():
	return render_template("recommend.html",
	active_page="recommend",
	users=USER_OPTIONS,
	categories=CATEGORIES,
	brands=BRANDS,
	approaches=APPROACHES)


	@app.route("/evaluate")
	def evaluate_page():
	return render_template("evaluation.html",
	active_page="evaluate",
	users=USER_OPTIONS,
	categories=CATEGORIES,
	brands=BRANDS,
	approaches=APPROACHES)


	@app.route("/api/users")
	def api_users():
	return jsonify(USER_OPTIONS)


	@app.route("/api/user/<int:user_id>")
	def api_user(user_id):
	prefs = get_user_preferences(users, user_id)
	return jsonify(prefs)


	@app.route("/api/products")
	def api_products():
	cat = request.args.get("category")
	if cat:
	filtered = products[products["category"] == cat]
	else:
	filtered = products
	results = []
	for _, row in filtered.iterrows():
	results.append(get_product_info(row["product_id"]))
	return jsonify(results)


	def get_user_rated_items(user_id):
	user_ratings = ratings[ratings["user_id"] == user_id]
	return user_ratings[user_ratings["rating"] >= 3.5]["product_id"].tolist()


	@app.route("/api/recommend", methods=["POST"])
	def api_recommend():
	data = request.json
	user_id = data.get("user_id")
	approach = data.get("approach")
	method = data.get("method")
	n_recs = data.get("n", 10)

	if not user_id or not approach or not method:
	return jsonify({"error": "Missing required parameters"}), 400

	user_rated = get_user_rated_items(user_id)
	prefs = get_user_preferences(users, user_id)

	try:
	if approach == "cf":
	recs = cf.recommend(method, user_id, n_recommendations=n_recs)
	explanations = []
	for pid, score in recs:
	details = {"sim_score": score, "count": 10}
	explanation = explainer.explain_cf(method, user_id, pid, details)
	product = get_product_info(pid)
	explanations.append({**product, "score": round(score, 4), "explanation": explanation})

	elif approach == "content":
	recs = cb.recommend(method, user_profile_items=user_rated, preferences=prefs, n_recommendations=n_recs)
	explanations = []
	for pid, score in recs:
	details = {"score": score}
	explanation = explainer.explain_content(method, user_id, pid, details)
	product = get_product_info(pid)
	explanations.append({**product, "score": round(score, 4), "explanation": explanation})

	elif approach == "knowledge":
	constraints = {
	"budget_min": prefs.get("budget_min", 0),
	"budget_max": prefs.get("budget_max", 999999),
	"category": list(prefs.get("preferred_categories", set())),
	"brand": list(prefs.get("favorite_brands", set())),
	}
	context = {
	"interacted_category": "",
	"preferred_categories": prefs.get("preferred_categories", set()),
	"budget_min": prefs.get("budget_min", 0),
	"budget_max": prefs.get("budget_max", 999999),
	"favorite_brands": prefs.get("favorite_brands", set()),
	}
	recs = kb.recommend(method, constraints=constraints, context=context,
	preferences=prefs, n_recommendations=n_recs)
	explanations = []
	for pid, score in recs:
	details = {"score": score, "budget_max": prefs.get("budget_max", 0), "trigger_item": ""}
	explanation = explainer.explain_knowledge(method, user_id, pid, details)
	product = get_product_info(pid)
	explanations.append({**product, "score": round(score, 4), "explanation": explanation})

	else:
	return jsonify({"error": f"Unknown approach: {approach}"}), 400

	return jsonify({"recommendations": explanations})

	except Exception as e:
	logger.error("Recommendation error: %s\n%s", e, traceback.format_exc())
	return jsonify({"error": "An internal error occurred while generating recommendations"}), 500


	CF_METHOD_NAMES = ["user_based", "item_based", "svd", "knn", "slope_one"]
	CF_METHOD_LABELS = {
	"user_based": "User-Based",
	"item_based": "Item-Based",
	"svd": "SVD",
	"knn": "KNN",
	"slope_one": "Slope One",
	}

	@app.route("/api/evaluate/cf/<method>")
	def api_evaluate_cf(method):
	if method not in CF_METHOD_NAMES:
	return jsonify({"error": f"Unknown CF method: {method}"}), 400
	try:
	result = evaluator.evaluate_cf_method(method, cf_train, TEST, k=5)
	return jsonify(result)
	except Exception as e:
	logger.error("CF evaluation error [%s]: %s\n%s", method, e, traceback.format_exc())
	return jsonify({"method": method, "error": "Evaluation failed"})


	@app.route("/api/evaluate/cf/<method>/stream")
	def api_evaluate_cf_stream(method):
	if method not in ("svd", "slope_one"):
	return jsonify({"error": f"Streaming not supported for {method}"}), 400

	def generate():
	try:
	if method == "svd":
	gen = cf_train.train_svd_generator()
	if gen is not None:
	for epoch, total in gen:
	yield json_module.dumps({"type": "progress", "current": epoch, "total": total}) + "\n"

	elif method == "slope_one":
	gen = cf_train.compute_slope_one_dev_generator()
	if gen is not None:
	for item, total in gen:
	yield json_module.dumps({"type": "progress", "current": item, "total": total}) + "\n"

	yield json_module.dumps({"type": "phase", "label": "Evaluating users..."}) + "\n"
	result = evaluator.evaluate_cf_method(method, cf_train, TEST, k=5)
	yield json_module.dumps({"type": "result", "data": result}) + "\n"
	except Exception as e:
	logger.error("Stream evaluation error [%s]: %s\n%s", method, e, traceback.format_exc())
	yield json_module.dumps({"type": "error", "message": "Evaluation failed"}) + "\n"

	return Response(stream_with_context(generate()), mimetype="application/x-ndjson")


	@app.route("/api/evaluate/approaches")
	def api_evaluate_approaches():
	try:
	evaluator.set_test_ratings(TEST)
	test_users = TEST["user_id"].unique()[:20]

	def approach_precision_recall(recommender_fn):
	precisions, recalls = [], []
	for uid in test_users:
	try:
	recs = recommender_fn(uid)
	except Exception:
	recs = []
	rec_items = [r[0] for r in recs]
	relevant = evaluator._get_relevant_for_user(uid)
	if relevant:
	precisions.append(evaluator.precision_at_k(rec_items, relevant, 5))
	recalls.append(evaluator.recall_at_k(rec_items, relevant, 5))
	return precisions, recalls

	def cf_recommender(uid):
	return cf_train.recommend("item_based", uid, n_recommendations=10)

	train_ratings = ratings[~ratings.index.isin(TEST.index)]
	def cb_recommender(uid):
	profile = train_ratings[
	(train_ratings["user_id"] == uid) & (train_ratings["rating"] >= 3.5)
	]["product_id"].tolist()
	return cb.recommend("tfidf", user_profile_items=profile, n_recommendations=10)

	def kb_recommender(uid):
	prefs = get_user_preferences(users, uid)
	constraints = {
	"budget_min": prefs.get("budget_min", 0),
	"budget_max": prefs.get("budget_max", 999999),
	"category": list(prefs.get("preferred_categories", set())),
	"brand": list(prefs.get("favorite_brands", set())),
	}
	return kb.recommend("constraint", constraints=constraints, n_recommendations=10)

	results = []
	cf_p, cf_r = approach_precision_recall(cf_recommender)
	if cf_p:
	results.append({
	"approach": "Collaborative Filtering",
	"Precision@5": round(np.mean(cf_p), 4),
	"Recall@5": round(np.mean(cf_r), 4),
	})
	cb_p, cb_r = approach_precision_recall(cb_recommender)
	if cb_p:
	results.append({
	"approach": "Content-Based",
	"Precision@5": round(np.mean(cb_p), 4),
	"Recall@5": round(np.mean(cb_r), 4),
	})
	kb_p, kb_r = approach_precision_recall(kb_recommender)
	if kb_p:
	results.append({
	"approach": "Knowledge-Based",
	"Precision@5": round(np.mean(kb_p), 4),
	"Recall@5": round(np.mean(kb_r), 4),
	})

	best = max(results, key=lambda a: a.get("Precision@5", 0))["approach"] if results else None
	return jsonify({"approaches": results, "best_approach": best})
	except Exception as e:
	logger.error("Approach evaluation error: %s\n%s", e, traceback.format_exc())
	return jsonify({"error": "Approach comparison failed"}), 500


	@app.route("/api/products/filter")
	def api_products_filter():
	cat = request.args.get("category")
	brand = request.args.get("brand")
	price_min = request.args.get("price_min", type=float)
	price_max = request.args.get("price_max", type=float)
	q = request.args.get("q", "").lower()
	filtered = products.copy()
	if cat:
	filtered = filtered[filtered["category"] == cat]
	if brand:
	filtered = filtered[filtered["brand"] == brand]
	if price_min is not None:
	filtered = filtered[filtered["price"] >= price_min]
	if price_max is not None:
	filtered = filtered[filtered["price"] <= price_max]
	if q:
	filtered = filtered[filtered["name"].str.lower().str.contains(q, na=False)]
	results = []
	for _, row in filtered.iterrows():
	results.append(get_product_info(row["product_id"]))
	return jsonify({
	"total": len(results),
	"products": results,
	})


	@app.route("/api/user/<int:user_id>/preferences", methods=["PUT"])
	def api_update_preferences(user_id):
	data = request.json
	user_idx = users[users["user_id"] == user_id].index
	if user_idx.empty:
	return jsonify({"error": "User not found"}), 404
	if "budget_min" in data:
	users.at[user_idx[0], "budget_min"] = data["budget_min"]
	if "budget_max" in data:
	users.at[user_idx[0], "budget_max"] = data["budget_max"]
	prefs = get_user_preferences(users, user_id)
	for u in USER_OPTIONS:
	if u["id"] == user_id:
	u["budget_min"] = float(prefs.get("budget_min", 0))
	u["budget_max"] = float(prefs.get("budget_max", 999999))
	break
	return jsonify({"success": True, "preferences": {
	"budget_min": prefs.get("budget_min", 0),
	"budget_max": prefs.get("budget_max", 0),
	"name": prefs.get("name", ""),
	"age": prefs.get("age", 0),
	}})


	@app.route("/api/users", methods=["POST"])
	def api_create_user():
	global users
	data = request.json
	new_id = int(users["user_id"].max() + 1)
	new_name = data.get("name", f"User_{new_id}")
	new_row = pd.DataFrame([{
	"user_id": new_id,
	"name": new_name,
	"age": int(data.get("age", 25)),
	"preferred_categories": ",".join(data.get("categories", [])),
	"favorite_brands": ",".join(data.get("brands", [])),
	"budget_min": float(data.get("budget_min", 0)),
	"budget_max": float(data.get("budget_max", 500)),
	}])
	users = pd.concat([users, new_row], ignore_index=True)
	USER_OPTIONS.append({
	"id": new_id,
	"name": new_name,
	"age": int(data.get("age", 25)),
	"categories": data.get("categories", []),
	"brands": data.get("brands", []),
	"budget_min": float(data.get("budget_min", 0)),
	"budget_max": float(data.get("budget_max", 500)),
	})
	logger.info("Created user %s (ID %d)", new_name, new_id)
	return jsonify({"success": True, "user_id": new_id, "name": new_name})


	@app.route("/htmx/recommend", methods=["POST"])
	def htmx_recommend():
	data = request.json or request.form
	user_id = data.get("user_id", type=int)
	approach = data.get("approach", "cf")
	method = data.get("method", "user_based")
	n_recs = data.get("n", 10, type=int)

	if not user_id:
	return '<div class="empty-state"><div class="empty-icon">⚠️</div><p>Please select a user first.</p></div>'

	user_rated = get_user_rated_items(user_id)
	prefs = get_user_preferences(users, user_id)

	try:
	if approach == "cf":
	recs = cf.recommend(method, user_id, n_recommendations=n_recs)
	elif approach == "content":
	recs = cb.recommend(method, user_profile_items=user_rated, preferences=prefs, n_recommendations=n_recs)
	elif approach == "knowledge":
	constraints = {
	"budget_min": prefs.get("budget_min", 0),
	"budget_max": prefs.get("budget_max", 999999),
	"category": list(prefs.get("preferred_categories", set())),
	"brand": list(prefs.get("favorite_brands", set())),
	}
	recs = kb.recommend(method, constraints=constraints, preferences=prefs, n_recommendations=n_recs)
	else:
	return f'<div class="empty-state"><div class="empty-icon">❌</div><p>Unknown approach: {approach}</p></div>'
	except Exception as e:
	logger.error("HTMX recommend error: %s\n%s", e, traceback.format_exc())
	return f'<div class="empty-state"><div class="empty-icon">❌</div><p>An error occurred</p></div>'

	if not recs:
	return '<div class="empty-state"><div class="empty-icon">📭</div><p>No recommendations found.</p></div>'

	html = '<div class="product-grid">'
	for pid, score in recs:
	product = get_product_info(pid)
	if not product:
	continue
	explanation = "Recommended based on your preferences."
	html += f'''
	<div class="product-card">
	<div class="product-icon">{get_category_icon(product["category"])}</div>
	<div class="product-name">{product["name"]}</div>
	<div class="product-meta">{product["brand"]} · {product["subcategory"]}</div>
	<div class="compact-row">
	<div class="product-price">${product["price"]:.2f}</div>
	<div class="product-rating">{stars_html(product["avg_rating"])} {product["avg_rating"]}</div>
	</div>
	<div class="product-explanation">{explanation}</div>
	</div>'''
	html += '</div>'
	return html


	def get_category_icon(category):
	icons = {
	"Electronics": "💻", "Clothing": "👕", "Home & Kitchen": "🏠",
	"Books": "📚", "Sports": "⚽", "Beauty": "💄", "Toys": "🧸", "Automotive": "🚗"
	}
	return icons.get(category, "📦")


	def stars_html(rating):
	f = int(rating)
	return "★" * f + "☆" * (5 - f)


	if __name__ == "__main__":
	debug_mode = os.environ.get("FLASK_DEBUG", "0") == "1"
	port = int(os.environ.get("PORT", 7860))
	logger.info("Starting TasteEngine on port %d (debug=%s)", port, debug_mode)
	app.run(debug=debug_mode, host="0.0.0.0", port=port)