finalcapstone / recommend_app.py
Eric Hierholzer
updates
069d6cd
import os
import logging
from flask import Flask, send_from_directory, request, jsonify
from recommendation_engine import load_and_preprocess_data, build_or_load_model, get_recommendations
from collections import Counter
import re
def normalize_title(title):
"""Convert title to lowercase, strip spaces, and normalize '&' to 'and' for consistency."""
title = title.lower().strip()
title = re.sub(r"[^\w\s&]", "", title) # Keep '&' but remove other special characters
title = re.sub(r"\s+", " ", title) # Replace multiple spaces with a single space
title = title.replace("&", "and") # Normalize '&' to 'and'
return title
# Configure logging once
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
app = Flask(__name__, static_folder='static')
# Load data and model at startup
try:
df = load_and_preprocess_data("netflix_titles.csv")
_, cosine_sim_matrix, title_to_index = build_or_load_model(df, "/tmp/cosine_sim_cache.pkl")
logger.info("Application started successfully with data and model loaded.")
except Exception as e:
logger.error(f"Startup failed: {str(e)}")
raise
@app.route('/')
def serve_frontend():
"""Serve the frontend index.html from the static folder."""
return send_from_directory(app.static_folder, 'index.html')
@app.route('/recommend', methods=['GET'])
def recommend():
raw_title = request.args.get("title", "").strip()
title = normalize_title(raw_title) # Normalize once
limit = int(request.args.get("limit", 10))
offset = int(request.args.get("offset", 0))
content_type = request.args.get("type", None)
fields = request.args.getlist("fields")
# Debugging logs
logger.info(f"Received API request: {request.url}")
logger.info(f"Raw title received: '{raw_title}'")
logger.info(f"Normalized title used for lookup: '{title}'")
print(f"API request received: {request.url}")
print(f"RAW title received: '{raw_title}'")
print(f"Normalized title for lookup: '{title}'")
print(f"Checking if '{title}' exists in title_to_index:", title in title_to_index)
if not title:
return jsonify({"message": "Title required", "recommendations": []}), 400
# Print available keys to debug mismatches
if title not in title_to_index or title_to_index[title] is None:
logger.error(f"Title '{title}' is missing from title_to_index or maps to None!")
return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404
print(f"'{title}' NOT FOUND in title_to_index!")
# Debugging - Print first 20 keys in title_to_index
print("Sample titles available in title_to_index:")
print(list(title_to_index.keys())[:20])
return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404
try:
recs = get_recommendations(
title, df, title_to_index, cosine_sim_matrix,
top_n=limit + offset, content_type=content_type, fields=fields or None
)
if not recs:
logger.warning(f"No recommendations found for '{title}'")
print(f"⚠️ No recommendations found for '{title}'")
return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404
return jsonify({
"message": "Similar Movies",
"recommendations": recs[offset:offset + limit],
"total": len(recs)
})
except Exception as e:
logger.error(f"Error generating recommendations for '{title}': {str(e)}")
print(f"ERROR generating recommendations for '{title}': {str(e)}")
return jsonify({"message": f"Server error: {str(e)}", "recommendations": []}), 500
@app.route('/search', methods=['GET'])
def search_titles():
"""Return title suggestions based on a query."""
query = request.args.get("q", "").strip().lower()
if not query:
return jsonify([])
try:
filtered = df[df['title'].str.lower().str.contains(query, na=False)]
suggestions = filtered['title'].head(10).tolist()
return jsonify(suggestions)
except Exception as e:
logger.error(f"Error in search for '{query}': {str(e)}")
return jsonify([]), 500
@app.route('/visualizations', methods=['GET'])
def get_visualizations():
"""Return structured data for visualizations (genre, type, country distributions)."""
try:
# Ensure all keys exist, even if empty
genre_counts = df['listed_in'].str.split(', ').explode().value_counts().head(10)
type_counts = df['type'].value_counts()
country_counts = df['country'].str.split(', ').explode().value_counts().head(5)
# Ensure the response includes labels for the frontend
response = {
"message": "Success",
"genre_distribution": {
"data": genre_counts.to_dict() if not genre_counts.empty else {},
"title": "Top 10 Most Common Genres",
"x_label": "Genre",
"y_label": "Count"
},
"type_distribution": {
"data": type_counts.to_dict() if not type_counts.empty else {},
"title": "Distribution of Movies vs. TV Shows",
"x_label": "Type",
"y_label": "Count"
},
"top_countries": {
"data": country_counts.to_dict() if not country_counts.empty else {},
"title": "Top 5 Countries Producing Content",
"x_label": "Country",
"y_label": "Count"
}
}
logger.info("Generated visualization data successfully.")
return jsonify(response)
except Exception as e:
logger.error(f"Visualization error: {str(e)}")
return jsonify({"message": f"Error generating visualizations: {str(e)}"}), 500
# if __name__ == "__main__":
# app.run(debug=False, host="0.0.0.0", port=7860)
if __name__ == "__main__":
from gunicorn.app.wsgiapp import run
run()