Spaces:

erichier
/

finalcapstone

Sleeping

File size: 6,108 Bytes

import os
import logging
from flask import Flask, send_from_directory, request, jsonify
from recommendation_engine import load_and_preprocess_data, build_or_load_model, get_recommendations
from collections import Counter
import re

def normalize_title(title):
    """Convert title to lowercase, strip spaces, and normalize '&' to 'and' for consistency."""
    title = title.lower().strip()
    title = re.sub(r"[^\w\s&]", "", title)  # Keep '&' but remove other special characters
    title = re.sub(r"\s+", " ", title)  # Replace multiple spaces with a single space
    title = title.replace("&", "and")  # Normalize '&' to 'and'
    return title

# Configure logging once
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

app = Flask(__name__, static_folder='static')

# Load data and model at startup
try:
    df = load_and_preprocess_data("netflix_titles.csv")
    _, cosine_sim_matrix, title_to_index = build_or_load_model(df, "/tmp/cosine_sim_cache.pkl")
    logger.info("Application started successfully with data and model loaded.")
except Exception as e:
    logger.error(f"Startup failed: {str(e)}")
    raise

@app.route('/')
def serve_frontend():
    """Serve the frontend index.html from the static folder."""
    return send_from_directory(app.static_folder, 'index.html')

@app.route('/recommend', methods=['GET'])
def recommend():
    raw_title = request.args.get("title", "").strip()
    title = normalize_title(raw_title)  # Normalize once
    
    limit = int(request.args.get("limit", 10))
    offset = int(request.args.get("offset", 0))
    content_type = request.args.get("type", None)
    fields = request.args.getlist("fields")

    # Debugging logs
    logger.info(f"Received API request: {request.url}")
    logger.info(f"Raw title received: '{raw_title}'")
    logger.info(f"Normalized title used for lookup: '{title}'")
    print(f"API request received: {request.url}")
    print(f"RAW title received: '{raw_title}'")
    print(f"Normalized title for lookup: '{title}'")
    print(f"Checking if '{title}' exists in title_to_index:", title in title_to_index)

    if not title:
        return jsonify({"message": "Title required", "recommendations": []}), 400

    # Print available keys to debug mismatches
    if title not in title_to_index or title_to_index[title] is None:
        logger.error(f"Title '{title}' is missing from title_to_index or maps to None!")
        return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404
        print(f"'{title}' NOT FOUND in title_to_index!")

        # Debugging - Print first 20 keys in title_to_index
        print("Sample titles available in title_to_index:")
        print(list(title_to_index.keys())[:20])

        return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404

    try:
        recs = get_recommendations(
            title, df, title_to_index, cosine_sim_matrix,
            top_n=limit + offset, content_type=content_type, fields=fields or None
        )
        if not recs:
            logger.warning(f"No recommendations found for '{title}'")
            print(f"⚠️ No recommendations found for '{title}'")
            return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404

        return jsonify({
            "message": "Similar Movies",
            "recommendations": recs[offset:offset + limit],
            "total": len(recs)
        })
    except Exception as e:
        logger.error(f"Error generating recommendations for '{title}': {str(e)}")
        print(f"ERROR generating recommendations for '{title}': {str(e)}")
        return jsonify({"message": f"Server error: {str(e)}", "recommendations": []}), 500


@app.route('/search', methods=['GET'])
def search_titles():
    """Return title suggestions based on a query."""
    query = request.args.get("q", "").strip().lower()
    if not query:
        return jsonify([])
    
    try:
        filtered = df[df['title'].str.lower().str.contains(query, na=False)]
        suggestions = filtered['title'].head(10).tolist()
        return jsonify(suggestions)
    except Exception as e:
        logger.error(f"Error in search for '{query}': {str(e)}")
        return jsonify([]), 500

@app.route('/visualizations', methods=['GET'])
def get_visualizations():
    """Return structured data for visualizations (genre, type, country distributions)."""
    try:
        # Ensure all keys exist, even if empty
        genre_counts = df['listed_in'].str.split(', ').explode().value_counts().head(10)
        type_counts = df['type'].value_counts()
        country_counts = df['country'].str.split(', ').explode().value_counts().head(5)

        # Ensure the response includes labels for the frontend
        response = {
            "message": "Success",
            "genre_distribution": {
                "data": genre_counts.to_dict() if not genre_counts.empty else {},
                "title": "Top 10 Most Common Genres",
                "x_label": "Genre",
                "y_label": "Count"
            },
            "type_distribution": {
                "data": type_counts.to_dict() if not type_counts.empty else {},
                "title": "Distribution of Movies vs. TV Shows",
                "x_label": "Type",
                "y_label": "Count"
            },
            "top_countries": {
                "data": country_counts.to_dict() if not country_counts.empty else {},
                "title": "Top 5 Countries Producing Content",
                "x_label": "Country",
                "y_label": "Count"
            }
        }

        logger.info("Generated visualization data successfully.")
        return jsonify(response)

    except Exception as e:
        logger.error(f"Visualization error: {str(e)}")
        return jsonify({"message": f"Error generating visualizations: {str(e)}"}), 500

# if __name__ == "__main__":
#     app.run(debug=False, host="0.0.0.0", port=7860)

if __name__ == "__main__":
    from gunicorn.app.wsgiapp import run
    run()