Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| from flask import Flask, send_from_directory, request, jsonify | |
| from recommendation_engine import load_and_preprocess_data, build_or_load_model, get_recommendations | |
| from collections import Counter | |
| import re | |
| def normalize_title(title): | |
| """Convert title to lowercase, strip spaces, and normalize '&' to 'and' for consistency.""" | |
| title = title.lower().strip() | |
| title = re.sub(r"[^\w\s&]", "", title) # Keep '&' but remove other special characters | |
| title = re.sub(r"\s+", " ", title) # Replace multiple spaces with a single space | |
| title = title.replace("&", "and") # Normalize '&' to 'and' | |
| return title | |
| # Configure logging once | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s - %(levelname)s - %(message)s" | |
| ) | |
| logger = logging.getLogger(__name__) | |
| app = Flask(__name__, static_folder='static') | |
| # Load data and model at startup | |
| try: | |
| df = load_and_preprocess_data("netflix_titles.csv") | |
| _, cosine_sim_matrix, title_to_index = build_or_load_model(df, "/tmp/cosine_sim_cache.pkl") | |
| logger.info("Application started successfully with data and model loaded.") | |
| except Exception as e: | |
| logger.error(f"Startup failed: {str(e)}") | |
| raise | |
| def serve_frontend(): | |
| """Serve the frontend index.html from the static folder.""" | |
| return send_from_directory(app.static_folder, 'index.html') | |
| def recommend(): | |
| raw_title = request.args.get("title", "").strip() | |
| title = normalize_title(raw_title) # Normalize once | |
| limit = int(request.args.get("limit", 10)) | |
| offset = int(request.args.get("offset", 0)) | |
| content_type = request.args.get("type", None) | |
| fields = request.args.getlist("fields") | |
| # Debugging logs | |
| logger.info(f"Received API request: {request.url}") | |
| logger.info(f"Raw title received: '{raw_title}'") | |
| logger.info(f"Normalized title used for lookup: '{title}'") | |
| print(f"API request received: {request.url}") | |
| print(f"RAW title received: '{raw_title}'") | |
| print(f"Normalized title for lookup: '{title}'") | |
| print(f"Checking if '{title}' exists in title_to_index:", title in title_to_index) | |
| if not title: | |
| return jsonify({"message": "Title required", "recommendations": []}), 400 | |
| # Print available keys to debug mismatches | |
| if title not in title_to_index or title_to_index[title] is None: | |
| logger.error(f"Title '{title}' is missing from title_to_index or maps to None!") | |
| return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404 | |
| print(f"'{title}' NOT FOUND in title_to_index!") | |
| # Debugging - Print first 20 keys in title_to_index | |
| print("Sample titles available in title_to_index:") | |
| print(list(title_to_index.keys())[:20]) | |
| return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404 | |
| try: | |
| recs = get_recommendations( | |
| title, df, title_to_index, cosine_sim_matrix, | |
| top_n=limit + offset, content_type=content_type, fields=fields or None | |
| ) | |
| if not recs: | |
| logger.warning(f"No recommendations found for '{title}'") | |
| print(f"⚠️ No recommendations found for '{title}'") | |
| return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404 | |
| return jsonify({ | |
| "message": "Similar Movies", | |
| "recommendations": recs[offset:offset + limit], | |
| "total": len(recs) | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error generating recommendations for '{title}': {str(e)}") | |
| print(f"ERROR generating recommendations for '{title}': {str(e)}") | |
| return jsonify({"message": f"Server error: {str(e)}", "recommendations": []}), 500 | |
| def search_titles(): | |
| """Return title suggestions based on a query.""" | |
| query = request.args.get("q", "").strip().lower() | |
| if not query: | |
| return jsonify([]) | |
| try: | |
| filtered = df[df['title'].str.lower().str.contains(query, na=False)] | |
| suggestions = filtered['title'].head(10).tolist() | |
| return jsonify(suggestions) | |
| except Exception as e: | |
| logger.error(f"Error in search for '{query}': {str(e)}") | |
| return jsonify([]), 500 | |
| def get_visualizations(): | |
| """Return structured data for visualizations (genre, type, country distributions).""" | |
| try: | |
| # Ensure all keys exist, even if empty | |
| genre_counts = df['listed_in'].str.split(', ').explode().value_counts().head(10) | |
| type_counts = df['type'].value_counts() | |
| country_counts = df['country'].str.split(', ').explode().value_counts().head(5) | |
| # Ensure the response includes labels for the frontend | |
| response = { | |
| "message": "Success", | |
| "genre_distribution": { | |
| "data": genre_counts.to_dict() if not genre_counts.empty else {}, | |
| "title": "Top 10 Most Common Genres", | |
| "x_label": "Genre", | |
| "y_label": "Count" | |
| }, | |
| "type_distribution": { | |
| "data": type_counts.to_dict() if not type_counts.empty else {}, | |
| "title": "Distribution of Movies vs. TV Shows", | |
| "x_label": "Type", | |
| "y_label": "Count" | |
| }, | |
| "top_countries": { | |
| "data": country_counts.to_dict() if not country_counts.empty else {}, | |
| "title": "Top 5 Countries Producing Content", | |
| "x_label": "Country", | |
| "y_label": "Count" | |
| } | |
| } | |
| logger.info("Generated visualization data successfully.") | |
| return jsonify(response) | |
| except Exception as e: | |
| logger.error(f"Visualization error: {str(e)}") | |
| return jsonify({"message": f"Error generating visualizations: {str(e)}"}), 500 | |
| # if __name__ == "__main__": | |
| # app.run(debug=False, host="0.0.0.0", port=7860) | |
| if __name__ == "__main__": | |
| from gunicorn.app.wsgiapp import run | |
| run() | |