import os import logging from flask import Flask, send_from_directory, request, jsonify from recommendation_engine import load_and_preprocess_data, build_or_load_model, get_recommendations from collections import Counter import re def normalize_title(title): """Convert title to lowercase, strip spaces, and normalize '&' to 'and' for consistency.""" title = title.lower().strip() title = re.sub(r"[^\w\s&]", "", title) # Keep '&' but remove other special characters title = re.sub(r"\s+", " ", title) # Replace multiple spaces with a single space title = title.replace("&", "and") # Normalize '&' to 'and' return title # Configure logging once logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) app = Flask(__name__, static_folder='static') # Load data and model at startup try: df = load_and_preprocess_data("netflix_titles.csv") _, cosine_sim_matrix, title_to_index = build_or_load_model(df, "/tmp/cosine_sim_cache.pkl") logger.info("Application started successfully with data and model loaded.") except Exception as e: logger.error(f"Startup failed: {str(e)}") raise @app.route('/') def serve_frontend(): """Serve the frontend index.html from the static folder.""" return send_from_directory(app.static_folder, 'index.html') @app.route('/recommend', methods=['GET']) def recommend(): raw_title = request.args.get("title", "").strip() title = normalize_title(raw_title) # Normalize once limit = int(request.args.get("limit", 10)) offset = int(request.args.get("offset", 0)) content_type = request.args.get("type", None) fields = request.args.getlist("fields") # Debugging logs logger.info(f"Received API request: {request.url}") logger.info(f"Raw title received: '{raw_title}'") logger.info(f"Normalized title used for lookup: '{title}'") print(f"API request received: {request.url}") print(f"RAW title received: '{raw_title}'") print(f"Normalized title for lookup: '{title}'") print(f"Checking if '{title}' exists in title_to_index:", title in title_to_index) if not title: return jsonify({"message": "Title required", "recommendations": []}), 400 # Print available keys to debug mismatches if title not in title_to_index or title_to_index[title] is None: logger.error(f"Title '{title}' is missing from title_to_index or maps to None!") return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404 print(f"'{title}' NOT FOUND in title_to_index!") # Debugging - Print first 20 keys in title_to_index print("Sample titles available in title_to_index:") print(list(title_to_index.keys())[:20]) return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404 try: recs = get_recommendations( title, df, title_to_index, cosine_sim_matrix, top_n=limit + offset, content_type=content_type, fields=fields or None ) if not recs: logger.warning(f"No recommendations found for '{title}'") print(f"⚠️ No recommendations found for '{title}'") return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404 return jsonify({ "message": "Similar Movies", "recommendations": recs[offset:offset + limit], "total": len(recs) }) except Exception as e: logger.error(f"Error generating recommendations for '{title}': {str(e)}") print(f"ERROR generating recommendations for '{title}': {str(e)}") return jsonify({"message": f"Server error: {str(e)}", "recommendations": []}), 500 @app.route('/search', methods=['GET']) def search_titles(): """Return title suggestions based on a query.""" query = request.args.get("q", "").strip().lower() if not query: return jsonify([]) try: filtered = df[df['title'].str.lower().str.contains(query, na=False)] suggestions = filtered['title'].head(10).tolist() return jsonify(suggestions) except Exception as e: logger.error(f"Error in search for '{query}': {str(e)}") return jsonify([]), 500 @app.route('/visualizations', methods=['GET']) def get_visualizations(): """Return structured data for visualizations (genre, type, country distributions).""" try: # Ensure all keys exist, even if empty genre_counts = df['listed_in'].str.split(', ').explode().value_counts().head(10) type_counts = df['type'].value_counts() country_counts = df['country'].str.split(', ').explode().value_counts().head(5) # Ensure the response includes labels for the frontend response = { "message": "Success", "genre_distribution": { "data": genre_counts.to_dict() if not genre_counts.empty else {}, "title": "Top 10 Most Common Genres", "x_label": "Genre", "y_label": "Count" }, "type_distribution": { "data": type_counts.to_dict() if not type_counts.empty else {}, "title": "Distribution of Movies vs. TV Shows", "x_label": "Type", "y_label": "Count" }, "top_countries": { "data": country_counts.to_dict() if not country_counts.empty else {}, "title": "Top 5 Countries Producing Content", "x_label": "Country", "y_label": "Count" } } logger.info("Generated visualization data successfully.") return jsonify(response) except Exception as e: logger.error(f"Visualization error: {str(e)}") return jsonify({"message": f"Error generating visualizations: {str(e)}"}), 500 # if __name__ == "__main__": # app.run(debug=False, host="0.0.0.0", port=7860) if __name__ == "__main__": from gunicorn.app.wsgiapp import run run()