File size: 6,108 Bytes
0a2f730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acbf545
0a2f730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
069d6cd
0a2f730
 
 
 
 
 
 
 
 
 
 
069d6cd
0a2f730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53a369d
0a2f730
53a369d
 
 
 
 
 
 
0a2f730
53a369d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a2f730
 
 
 
fa2d998
 
 
0a2f730
fa2d998
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os
import logging
from flask import Flask, send_from_directory, request, jsonify
from recommendation_engine import load_and_preprocess_data, build_or_load_model, get_recommendations
from collections import Counter
import re

def normalize_title(title):
    """Convert title to lowercase, strip spaces, and normalize '&' to 'and' for consistency."""
    title = title.lower().strip()
    title = re.sub(r"[^\w\s&]", "", title)  # Keep '&' but remove other special characters
    title = re.sub(r"\s+", " ", title)  # Replace multiple spaces with a single space
    title = title.replace("&", "and")  # Normalize '&' to 'and'
    return title

# Configure logging once
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

app = Flask(__name__, static_folder='static')

# Load data and model at startup
try:
    df = load_and_preprocess_data("netflix_titles.csv")
    _, cosine_sim_matrix, title_to_index = build_or_load_model(df, "/tmp/cosine_sim_cache.pkl")
    logger.info("Application started successfully with data and model loaded.")
except Exception as e:
    logger.error(f"Startup failed: {str(e)}")
    raise

@app.route('/')
def serve_frontend():
    """Serve the frontend index.html from the static folder."""
    return send_from_directory(app.static_folder, 'index.html')

@app.route('/recommend', methods=['GET'])
def recommend():
    raw_title = request.args.get("title", "").strip()
    title = normalize_title(raw_title)  # Normalize once
    
    limit = int(request.args.get("limit", 10))
    offset = int(request.args.get("offset", 0))
    content_type = request.args.get("type", None)
    fields = request.args.getlist("fields")

    # Debugging logs
    logger.info(f"Received API request: {request.url}")
    logger.info(f"Raw title received: '{raw_title}'")
    logger.info(f"Normalized title used for lookup: '{title}'")
    print(f"API request received: {request.url}")
    print(f"RAW title received: '{raw_title}'")
    print(f"Normalized title for lookup: '{title}'")
    print(f"Checking if '{title}' exists in title_to_index:", title in title_to_index)

    if not title:
        return jsonify({"message": "Title required", "recommendations": []}), 400

    # Print available keys to debug mismatches
    if title not in title_to_index or title_to_index[title] is None:
        logger.error(f"Title '{title}' is missing from title_to_index or maps to None!")
        return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404
        print(f"'{title}' NOT FOUND in title_to_index!")

        # Debugging - Print first 20 keys in title_to_index
        print("Sample titles available in title_to_index:")
        print(list(title_to_index.keys())[:20])

        return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404

    try:
        recs = get_recommendations(
            title, df, title_to_index, cosine_sim_matrix,
            top_n=limit + offset, content_type=content_type, fields=fields or None
        )
        if not recs:
            logger.warning(f"No recommendations found for '{title}'")
            print(f"⚠️ No recommendations found for '{title}'")
            return jsonify({"message": f"'{raw_title}' not found", "recommendations": []}), 404

        return jsonify({
            "message": "Similar Movies",
            "recommendations": recs[offset:offset + limit],
            "total": len(recs)
        })
    except Exception as e:
        logger.error(f"Error generating recommendations for '{title}': {str(e)}")
        print(f"ERROR generating recommendations for '{title}': {str(e)}")
        return jsonify({"message": f"Server error: {str(e)}", "recommendations": []}), 500


@app.route('/search', methods=['GET'])
def search_titles():
    """Return title suggestions based on a query."""
    query = request.args.get("q", "").strip().lower()
    if not query:
        return jsonify([])
    
    try:
        filtered = df[df['title'].str.lower().str.contains(query, na=False)]
        suggestions = filtered['title'].head(10).tolist()
        return jsonify(suggestions)
    except Exception as e:
        logger.error(f"Error in search for '{query}': {str(e)}")
        return jsonify([]), 500

@app.route('/visualizations', methods=['GET'])
def get_visualizations():
    """Return structured data for visualizations (genre, type, country distributions)."""
    try:
        # Ensure all keys exist, even if empty
        genre_counts = df['listed_in'].str.split(', ').explode().value_counts().head(10)
        type_counts = df['type'].value_counts()
        country_counts = df['country'].str.split(', ').explode().value_counts().head(5)

        # Ensure the response includes labels for the frontend
        response = {
            "message": "Success",
            "genre_distribution": {
                "data": genre_counts.to_dict() if not genre_counts.empty else {},
                "title": "Top 10 Most Common Genres",
                "x_label": "Genre",
                "y_label": "Count"
            },
            "type_distribution": {
                "data": type_counts.to_dict() if not type_counts.empty else {},
                "title": "Distribution of Movies vs. TV Shows",
                "x_label": "Type",
                "y_label": "Count"
            },
            "top_countries": {
                "data": country_counts.to_dict() if not country_counts.empty else {},
                "title": "Top 5 Countries Producing Content",
                "x_label": "Country",
                "y_label": "Count"
            }
        }

        logger.info("Generated visualization data successfully.")
        return jsonify(response)

    except Exception as e:
        logger.error(f"Visualization error: {str(e)}")
        return jsonify({"message": f"Error generating visualizations: {str(e)}"}), 500

# if __name__ == "__main__":
#     app.run(debug=False, host="0.0.0.0", port=7860)

if __name__ == "__main__":
    from gunicorn.app.wsgiapp import run
    run()