Yoma commited on
Commit
a1a5ffc
·
0 Parent(s):

Initial commit with LFS

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ data/processed/top_similar_items.pkl filter=lfs diff=lfs merge=lfs -text
2
+ home-page-recos.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ *.pyc
4
+ .env
5
+ .gemini/
6
+ .gemini-*
7
+ backup/
8
+ blogs/
9
+ requirements/
10
+ GEMINI.md
11
+ reproduce_issue.py
12
+
13
+ # Data paths
14
+ data/ml-*/
15
+ data/*.zip
16
+ data/processed/similarity_matrix.pkl
17
+
18
+ # Keep processed artifacts (except the massive raw matrix)
19
+ !data/processed/
20
+ !data/processed/*.json
21
+ !data/processed/top_similar_items.pkl
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI-Powered Explainable Recommendations
2
+
3
+ This project is a high-fidelity demonstration of how Large Language Models (LLMs) can bridge the "explainability gap" in recommendation systems. It transforms opaque collaborative filtering data into transparent, persona-aligned narratives.
4
+
5
+ ![Demo Application](home-page-recos.jpg)
6
+
7
+ ## 🚀 Overview
8
+
9
+ Instead of providing a "black box" list of suggestions, this system uses LLMs to synthesize natural language explanations based on user personas and collaborative filtering logic.
10
+
11
+ ### Key Features
12
+ - **Specialist Centroids**: Personas built by aggregating top "specialists" for specific genres.
13
+ - **Layered Re-ranking**: Incorporates Genre Affinity Boosts and Recency Bias.
14
+ - **Narrative Synthesis**: Uses LiteLLM to generate 3-sentence explanations in "Logic-Driven" or "Social-Behavioral" styles.
15
+
16
+ ## 🛠 Tech Stack
17
+
18
+ - **Interface**: [Gradio](https://gradio.app/)
19
+ - **Recommendation Engine**: Python (`pandas`, `numpy`, `scikit-learn`)
20
+ - **LLM Integration**: [LiteLLM](https://github.com/BerriAI/litellm)
21
+ - **Data Source**: [MovieLens Latest Small](https://grouplens.org/datasets/movielens/latest/)
22
+ - **Visuals**: [TMDB API](https://www.themoviedb.org/documentation/api) for movie posters.
23
+
24
+ ## 📁 Quick Start
25
+
26
+ 1. **Environment Setup**:
27
+ Create a `.env` file:
28
+ ```env
29
+ HF_TOKEN=your_huggingface_token
30
+ TMDB_API_KEY=your_tmdb_api_key
31
+ ```
32
+ 2. **Install Dependencies**:
33
+ ```bash
34
+ pip install -r requirements.txt
35
+ ```
36
+ 3. **Data Preparation**:
37
+ ```bash
38
+ python data_prep.py
39
+ ```
40
+ 4. **Run the Application**:
41
+ ```bash
42
+ python app.py
43
+ ```
44
+
45
+ ## 📜 Acknowledgments & Data Sources
46
+
47
+ - **Dataset**: This project uses the **MovieLens Latest Dataset - Small** provided by [GroupLens Research](https://grouplens.org/datasets/movielens/).
48
+ - **Images**: Movie posters are retrieved using the **TMDB API**. *Note: This product uses the TMDB API but is not endorsed or certified by TMDB.*
app.py ADDED
@@ -0,0 +1,531 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - OPTIMIZED VERSION with Specialist Centroids & Normalized Ratings
2
+ # Key optimization: Uses pre-computed top_similar_items for O(1) lookups
3
+ # Layer 3 Normalization: Σ(Similarity × Rating) / Σ|Similarity| for proper collaborative filtering
4
+
5
+ import gradio as gr
6
+ import json
7
+ import os
8
+ import time
9
+ import requests
10
+ import litellm
11
+ from config import TMDB_API_KEY, LLM_MODEL_NAME, BASE_PROMPT, LOGIC_INJECTION, SOCIAL_INJECTION
12
+
13
+ # --- Configuration & Constants ---
14
+ PROCESSED_DIR = 'data/processed'
15
+ POSTER_PLACEHOLDER = "https://placehold.co/150x225/000000/FFFFFF?text=Poster"
16
+
17
+ # --- Data Loading & Helpers ---
18
+ def load_data(filename, is_pickle=False):
19
+ import pickle
20
+ path = os.path.join(PROCESSED_DIR, filename)
21
+ try:
22
+ if is_pickle:
23
+ with open(path, 'rb') as f:
24
+ return pickle.load(f)
25
+ else:
26
+ with open(path, 'r') as f:
27
+ return json.load(f)
28
+ except FileNotFoundError:
29
+ raise FileNotFoundError(f"Could not find '{filename}'. Please ensure data_prep.py ran successfully.")
30
+
31
+ def get_poster_url(movie_title):
32
+ """Fetches a movie poster URL from the TMDB API."""
33
+ if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY":
34
+ return POSTER_PLACEHOLDER
35
+
36
+ try:
37
+ search_title = movie_title
38
+ year = None
39
+ if '(' in movie_title and ')' in movie_title:
40
+ year_str = movie_title[movie_title.rfind('(')+1:movie_title.rfind(')')]
41
+ if year_str.isdigit():
42
+ year = int(year_str)
43
+ search_title = movie_title[:movie_title.rfind('(')].strip()
44
+
45
+ search_params = {'api_key': TMDB_API_KEY, 'query': search_title}
46
+ if year:
47
+ search_params['year'] = year
48
+
49
+ response = requests.get("https://api.themoviedb.org/3/search/movie",
50
+ params=search_params, timeout=3)
51
+ response.raise_for_status()
52
+ data = response.json()
53
+
54
+ if data['results'] and data['results'][0].get('poster_path'):
55
+ poster_path = data['results'][0].get('poster_path')
56
+ return f"https://image.tmdb.org/t/p/w500{poster_path}"
57
+
58
+ except requests.exceptions.RequestException as e:
59
+ print(f"TMDB API call failed: {e}")
60
+
61
+ return POSTER_PLACEHOLDER
62
+
63
+ # Load data at startup
64
+ print("Loading pre-computed data...")
65
+ persona_archetypes = load_data('persona_archetypes.json')
66
+ home_recommendations = load_data('home_recommendations.json')
67
+ cached_hooks = load_data('cached_hooks.json')
68
+ movie_metadata = load_data('movie_metadata.json')
69
+ top_similar_items = load_data('top_similar_items.pkl', is_pickle=True)
70
+ user_avg_ratings = load_data('user_avg_ratings.json')
71
+ print(f"✅ Loaded pre-computed similarity lookups for {len(top_similar_items)} movies")
72
+
73
+ # --- LLM EXPLANATION FUNCTION ---
74
+ def call_llm_with_fallback(full_prompt, movie_data, persona_name):
75
+ """Calls the LLM using LiteLLM and provides a fallback explanation."""
76
+ try:
77
+ print(f"DEBUG: Generating explanation for '{movie_data.get('movie_title', 'Unknown')}' using model: {LLM_MODEL_NAME}")
78
+ start_ts = time.time()
79
+ response = litellm.completion(
80
+ model=LLM_MODEL_NAME,
81
+ messages=[{"role": "user", "content": full_prompt}],
82
+ max_tokens=120,
83
+ temperature=0.1,
84
+ stream=False
85
+ )
86
+ print(f"DEBUG: Full LLM Response Object: {response}")
87
+ print(f"DEBUG: Explanation generated in {time.time() - start_ts:.4f}s")
88
+ report = response.choices[0].message.content
89
+ print("--- LLM Response ---")
90
+ print(report)
91
+ print("--------------------")
92
+ return report, False
93
+
94
+ except Exception as e:
95
+ error_message = f"LiteLLM API Error: {e}"
96
+ print(f"⚠️ {error_message}")
97
+ gr.Warning(error_message)
98
+ # Fallback logic
99
+ genres = movie_data.get('genres', ['Drama'])
100
+ primary_genre = genres[0] if genres else "Drama"
101
+ fallback_text = f"Recommended because you enjoyed similar {primary_genre} films. Highly rated by users with your preferences."
102
+ return fallback_text, True
103
+
104
+ def generate_hook_on_the_fly(movie_title):
105
+ """Generates a short, snappy hook for a movie using LiteLLM."""
106
+ prompt = f"Generate a 5-10 word snappy, atmospheric hook for the movie: {movie_title}."
107
+ try:
108
+ print(f"DEBUG: Generating hook for '{movie_title}' using model: {LLM_MODEL_NAME}")
109
+ start_ts = time.time()
110
+ response = litellm.completion(
111
+ model=LLM_MODEL_NAME,
112
+ messages=[{"role": "user", "content": prompt}],
113
+ max_tokens=25,
114
+ temperature=0.7,
115
+ stream=False,
116
+ )
117
+ #print(f"DEBUG: Full LLM Response Object: {response}")
118
+ print(f"DEBUG: Hook generated in {time.time() - start_ts:.4f}s")
119
+ hook = response.choices[0].message.content.strip().replace('"', '')
120
+ return hook
121
+ except Exception as e:
122
+ print(f"LiteLLM API call for on-the-fly hook failed: {e}")
123
+ return "A truly captivating film."
124
+
125
+ def generate_explanation(movie_id, persona_name, style):
126
+ movie_id_str = str(movie_id)
127
+ movie_info = movie_metadata[movie_id_str]
128
+ style_injection = LOGIC_INJECTION if style == "Logic-Driven" else SOCIAL_INJECTION
129
+
130
+ # Create a single, consolidated prompt to be more direct
131
+ full_prompt = f"""You are a digital movie concierge explaining a recommendation to a user.
132
+ Your explanation must be exactly 3 sentences and a maximum of 60 words.
133
+ Be punchy and engaging. Do not start with 'Based on your history'.
134
+ {style_injection}
135
+
136
+ Explain why the user with {persona_name} persona would like the movie '{movie_info['movie_title']}'
137
+ (Genres: {', '.join(movie_info['genres'])}). But do not mention the persona directly."""
138
+
139
+ return call_llm_with_fallback(full_prompt, movie_info, persona_name)
140
+
141
+ css = """
142
+ body { background-color: #f8fafc; }
143
+ .gradio-container { max-width: 1280px !important; margin: 0 auto !important; }
144
+ .view-container { padding: 20px; border: none; border-radius: 8px; margin-top: 20px; background: white; box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1); }
145
+
146
+ .movie-card-container {
147
+ background: white;
148
+ border-radius: 1rem;
149
+ overflow: hidden;
150
+ box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
151
+ display: flex !important;
152
+ flex-direction: column !important;
153
+ height: 100%;
154
+ padding: 1rem;
155
+ gap: 1rem;
156
+ }
157
+ .movie-card-content { display: flex; flex-direction: column; flex-grow: 1; background-color: #ffffff;}
158
+ .poster-container { position: relative; aspect-ratio: 2/3; background: #f1f5f9; }
159
+ .poster-img { width: 100%; height: 100%; object-fit: cover; }
160
+ .movie-card-text-content {
161
+ display: flex;
162
+ flex-direction: column;
163
+ flex-grow: 1;
164
+ }
165
+ .movie-title {
166
+ font-weight: 700;
167
+ font-size: 1.125rem;
168
+ color: #1e293b;
169
+ flex-shrink: 0;
170
+ /* FIX: Set fixed height to handle multi-line titles */
171
+ height: 2.5em;
172
+ line-height: 1.25em;
173
+ overflow: hidden;
174
+ }
175
+ .movie-year { font-size: 0.875rem; color: #64748b; margin-top: 2px; flex-shrink: 0; }
176
+ .catchy-phrase {
177
+ font-style: italic;
178
+ color: #4f46e5;
179
+ font-size: 1rem;
180
+ margin: 0.75rem 0;
181
+ /* FIX: Remove flex-grow and display:flex, left-align, and set fixed height */
182
+ text-align: left;
183
+ height: 2.5em; /* Approx 2 lines */
184
+ line-height: 1.2em;
185
+ overflow: hidden;
186
+ }
187
+ .genre-pills-container { display: flex; flex-wrap: wrap; gap: 5px; margin-top: 0.5rem; flex-shrink: 0; }
188
+ .genre-pill { background: #e2e8f0; color: #334155; padding: 4px 8px; border-radius: 9999px; font-size: 0.7rem; font-weight: 500; }
189
+ .synthesis-pill {
190
+ position: absolute; top: 12px; left: 12px;
191
+ background: rgba(30, 41, 59, 0.8);
192
+ color: white; padding: 4px 10px;
193
+ border-radius: 9999px; font-size: 12px; font-weight: 600;
194
+ backdrop-filter: blur(4px);
195
+ border: 1px solid rgba(255, 255, 255, 0.2);
196
+ }
197
+
198
+ .gradio-button {
199
+ background-color: #1e293b !important;
200
+ color: white !important;
201
+ border: none !important;
202
+ border-radius: 0.5rem !important;
203
+ font-weight: 600 !important;
204
+ transition: background-color 0.2s ease-in-out !important;
205
+ padding: 10px !important;
206
+ }
207
+ .gradio-button:hover { background-color: #334155 !important; }
208
+
209
+ #persona_selection_view {
210
+ max-width: 800px;
211
+ margin: 40px auto !important;
212
+ }
213
+
214
+ #persona_selection_view h2 { color: #1e293b !important; }
215
+
216
+ #home_title h2 { color: #1e293b !important; }
217
+
218
+ #detail_view { padding: 20px; border: none; border-radius: 8px; margin-top: 20px; background: white; box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1); }
219
+ #detail_poster img { max-height: 450px; }
220
+ #home_title h1 { color: #1e293b !important; }
221
+ #detail_title h2 { color: #1e293b !important; }
222
+ #detail_explanation p { color: #334155 !important; }
223
+ #detail_explanation { border: 1px solid #e2e8f0; padding: 1rem; border-radius: 0.5rem; }
224
+
225
+ #detail_view label {
226
+ color: white !important;
227
+ font-weight: 600 !important;
228
+ font-size: 1rem !important;
229
+ padding: 0.5rem 0.5rem !important;
230
+ }
231
+
232
+ #detail_view .gr-radio {
233
+ margin: 1rem 0 !important;
234
+ padding: 1.5rem !important;
235
+ }
236
+ """
237
+
238
+ def generate_card_html(movie_data, latency_s=0.02, is_live=False):
239
+ """Generates HTML for a single movie card."""
240
+ badge_text = f"⚡️ Synthesis: {latency_s:.2f}s"
241
+ genres_html = "".join([f'<span class="genre-pill">{g}</span>'
242
+ for g in movie_data.get('genres', [])[:5]])
243
+ return f"""
244
+ <div class="movie-card-content">
245
+ <div class="poster-container">
246
+ <img src="{movie_data['poster']}" class="poster-img" />
247
+ <div class="synthesis-pill">{badge_text}</div>
248
+ </div>
249
+ <div class="movie-card-text-content">
250
+ <div class="movie-title">{movie_data['title']}</div>
251
+ <div class="genre-pills-container">{genres_html}</div>
252
+ <div class="catchy-phrase">"{movie_data.get('hook', 'A must-see film.')}"</div>
253
+ </div>
254
+ </div>
255
+ """
256
+
257
+ def update_home_cards_ui(persona_name, recommendations, is_live=False, time_taken=0.02):
258
+ """Updates UI with movie cards for given recommendations."""
259
+ updates = []
260
+ hooks = cached_hooks.get(persona_name, {}) if not is_live else {}
261
+
262
+ for i in range(6):
263
+ if i < len(recommendations):
264
+ rec = recommendations[i]
265
+ movie_id = str(rec['movie_id'])
266
+ movie_info = movie_metadata[movie_id]
267
+ card_data = {
268
+ "title": movie_info['movie_title'],
269
+ "year": movie_info.get('year', ''),
270
+ "genres": movie_info.get('genres', []),
271
+ "poster": get_poster_url(movie_info['movie_title']),
272
+ "hook": hooks.get(movie_id, "A great movie awaits!")
273
+ }
274
+ if is_live:
275
+ card_data["hook"] = rec.get('hook', "A movie worth watching!")
276
+ html_content = generate_card_html(card_data, time_taken, is_live)
277
+ updates.append(gr.update(value=html_content, visible=True))
278
+ updates.append(gr.update(visible=True)) # Show button
279
+ updates.append(movie_id)
280
+ else:
281
+ updates.append(gr.update(visible=False))
282
+ updates.append(gr.update(visible=False)) # Hide button
283
+ updates.append(None)
284
+ return updates
285
+
286
+
287
+
288
+ # --- UI Construction ---
289
+ with gr.Blocks(css=css) as demo:
290
+ selected_persona = gr.State(None)
291
+ selected_movie_id = gr.State(None)
292
+
293
+ gr.Markdown("# 🎬 AI-Powered Explainable Recommendations")
294
+
295
+ with gr.Column(visible=True, elem_id="persona_selection_view", elem_classes="view-container") as persona_view:
296
+ gr.Markdown("## Choose a Persona")
297
+ persona_buttons = [gr.Button(f"View Recommendations for {name}")
298
+ for name in persona_archetypes.keys()]
299
+ gr.Markdown("""
300
+ <div style="margin-top: 1.5rem; padding: 1rem; background-color: #f1f5f9; border-radius: 0.75rem; border: 1px solid #e2e8f0;">
301
+ <p style="color: #475569; font-size: 0.9rem; margin-bottom: 0.5rem; line-height: 1.5;">
302
+ <strong style="color: #1e293b;">Note:</strong> Since this demo doesn't include user accounts, we use <b style="color: #1e293b;">User Personas</b> to showcase personalized recommendations. Each persona represents a unique set of movie preferences.
303
+ </p>
304
+ <a href="https://blogs-amoydutta.hashnode.dev/explainable-recommendations-in-practice-a-demo-driven-approach" target="_blank" style="color: #4f46e5; font-size: 0.9rem; font-weight: 600; text-decoration: none;">
305
+ Learn more about how this demo works in our blog →
306
+ </a>
307
+ </div>
308
+ """)
309
+
310
+ with gr.Column(visible=False, elem_id="home_view",
311
+ elem_classes="view-container") as home_view:
312
+ with gr.Row():
313
+ back_to_persona_button = gr.Button("⬅️ Change Persona")
314
+ home_title = gr.Markdown(elem_id="home_title")
315
+
316
+ all_genres = sorted(list(set(g for m in movie_metadata.values() for g in m['genres'])))
317
+ all_genres.insert(0, "All (Cached Home Page)")
318
+ genre_dropdown = gr.Dropdown(all_genres, label="Select Genre",
319
+ value="All (Cached Home Page)")
320
+
321
+ with gr.Column(elem_id="recommendations_grid") as recommendations_grid:
322
+ home_cards = []
323
+ for i in range(2):
324
+ with gr.Row():
325
+ for j in range(3):
326
+ with gr.Column(elem_classes="movie-card-container"):
327
+ html = gr.HTML()
328
+ btn = gr.Button("⚡ See Full Explanation")
329
+ movie_id_state = gr.State(None)
330
+ home_cards.append({"html": html, "btn": btn, "id": movie_id_state})
331
+
332
+ no_recommendations_message = gr.Markdown("""
333
+ <div style="width: 100%; display: flex; justify-content: center; align-items: center; padding: 4rem 0;">
334
+ <div style="text-align: center;">
335
+ <h3 style="color: #1e293b;">🤷&nbsp; No recommendations found</h3>
336
+ <p style="color: #64748b; font-size: 0.875rem;">Try selecting a different genre for this persona.</p>
337
+ </div>
338
+ </div>
339
+ """, visible=False)
340
+
341
+ with gr.Column(visible=False, elem_id="detail_view",
342
+ elem_classes="view-container") as detail_view:
343
+ back_button = gr.Button("⬅️ Back to Recommendations")
344
+ with gr.Row():
345
+ detail_poster = gr.Image(POSTER_PLACEHOLDER, show_label=False, elem_id="detail_poster")
346
+ with gr.Column():
347
+ detail_title = gr.Markdown(elem_id="detail_title")
348
+ detail_explanation = gr.Markdown(elem_id="detail_explanation", label="The 'Why'")
349
+ narrative_style = gr.Radio(["Logic-Driven", "Social-Behavioral"],
350
+ label="Narrative Style", value="Logic-Driven")
351
+ detail_fallback_info = gr.Markdown(visible=False, elem_id="detail_fallback",
352
+ value="⚠️ LLM explanation failed or is disabled. Showing template-based reason.")
353
+
354
+ # Build output list
355
+ all_card_outputs = [item for card in home_cards for item in (card['html'], card['btn'], card['id'])]
356
+ home_page_outputs = [persona_view, home_view, detail_view, selected_persona, home_title] + all_card_outputs
357
+ detail_outputs = [home_view, detail_view, selected_movie_id, detail_title,
358
+ detail_poster, detail_explanation, detail_fallback_info]
359
+
360
+ def select_persona_and_render_home(persona_name):
361
+ print(f'Loading Home page for persona: {persona_name}')
362
+ start_time = time.time()
363
+ recs = home_recommendations.get(persona_name, [])
364
+ home_page_updates = [gr.update(visible=False), gr.update(visible=True),
365
+ gr.update(visible=False), persona_name, f"# {persona_name}'s Recommendations"]
366
+ card_updates = update_home_cards_ui(persona_name, recs)
367
+ time_taken = time.time() - start_time
368
+ print(f"Home page for '{persona_name}' rendered in {time_taken:.4f}s ({time_taken * 1000:.2f}ms)")
369
+ return home_page_updates + card_updates
370
+
371
+ def filter_by_genre(genre, persona_name):
372
+ """Filter recommendations by genre using Layer 3 normalization.
373
+
374
+ Formula: R = Σ(Similarity × Normalized_Rating) / Σ|Similarity|
375
+ - Uses specialist centroid's consolidated history (normalized ratings)
376
+ - Properly normalizes by total similarity to prevent hub-movie inflation
377
+ """
378
+ print(f"Starting genre filter for '{genre}'...")
379
+ start_time = time.time()
380
+ # Handle cached home page separately
381
+ if genre == "All (Cached Home Page)":
382
+ recs = home_recommendations.get(persona_name, [])
383
+ title = f"# {persona_name}'s Recommendations"
384
+ card_updates = update_home_cards_ui(persona_name, recs)
385
+ print(f"Genre filter '{genre}' completed in {time.time() - start_time:.4f}s (cached)")
386
+ return [title, gr.update(visible=True), gr.update(visible=False)] + card_updates
387
+
388
+ if not persona_name:
389
+ return ["# Error: No persona selected", gr.update(visible=False), gr.update(visible=False)] + [gr.update(visible=False)] * 18
390
+
391
+ archetype = persona_archetypes[persona_name]
392
+ history = archetype['consolidated_history'] # NORMALIZED ratings (-2 to +2 scale)
393
+ candidate_numerators = {}
394
+ candidate_denominators = {}
395
+
396
+ for m_id_str, rating in history.items():
397
+ if rating < 0.5: continue
398
+ sims = top_similar_items.get(str(m_id_str), {})
399
+ for sim_id, score in sims.items():
400
+ if sim_id in history: continue
401
+ meta = movie_metadata.get(sim_id)
402
+ if meta and genre in meta['genres']:
403
+ candidate_numerators[sim_id] = candidate_numerators.get(sim_id, 0) + (score * rating)
404
+ candidate_denominators[sim_id] = candidate_denominators.get(sim_id, 0) + score
405
+
406
+ candidate_scores = {
407
+ m_id: candidate_numerators[m_id] / candidate_denominators[m_id]
408
+ for m_id in candidate_numerators if candidate_denominators.get(m_id, 0) > 0
409
+ }
410
+
411
+ sorted_ids = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)[:6]
412
+ time_taken = time.time() - start_time
413
+ print(f"Genre filter '{genre}' recoomendations completed in {time_taken:.4f}s")
414
+
415
+ # Generate live recommendations with on-the-fly hooks
416
+ print(f"Starting hook generation for live recommendations for genre {genre}...")
417
+ start_time = time.time()
418
+
419
+ live_recs = []
420
+ for m_id, _ in sorted_ids:
421
+ title = movie_metadata[m_id]['movie_title']
422
+ hook = generate_hook_on_the_fly(title)
423
+ live_recs.append({
424
+ "movie_id": int(m_id),
425
+ "movie_title": title,
426
+ "hook": hook
427
+ })
428
+ time.sleep(0.5) # Gentle pause to prevent HF Backend Error 40001
429
+
430
+ time_taken = time.time() - start_time
431
+ print(f"Hook generation for genre {genre} completed in {time_taken:.4f}s")
432
+
433
+
434
+ title_update = f"# {genre} Recommendations for {persona_name}"
435
+
436
+ # Handle no recommendations
437
+ if not live_recs:
438
+ return [title_update, gr.update(visible=False), gr.update(visible=True)] + [gr.update()] * 18
439
+
440
+ card_updates = update_home_cards_ui(persona_name, live_recs, is_live=True, time_taken=time_taken)
441
+ return [title_update, gr.update(visible=True), gr.update(visible=False)] + card_updates
442
+
443
+ def transition_to_detail_view(movie_id):
444
+ if not movie_id:
445
+ return {k: gr.update() for k in detail_outputs}
446
+
447
+ movie_info = movie_metadata[str(movie_id)]
448
+ poster_url = get_poster_url(movie_info['movie_title'])
449
+
450
+ # Loading spinner HTML
451
+ loading_html = """
452
+ <div style="display: flex; align-items: center; justify-content: center; min-height: 200px; width: 100%; flex-direction: column; gap: 1rem; padding: 2rem;">
453
+ <div class="loader"></div>
454
+ <p style="color: #1e293b; font-size: 1.1rem; font-weight: 600;">Generating personalized explanation...</p>
455
+ </div>
456
+ <style>
457
+ .loader {
458
+ border: 4px solid #f3f3f3;
459
+ border-top: 4px solid #3b82f6;
460
+ border-radius: 50%;
461
+ width: 40px;
462
+ height: 40px;
463
+ animation: spin 1s linear infinite;
464
+ }
465
+ @keyframes spin {
466
+ 0% { transform: rotate(0deg); }
467
+ 100% { transform: rotate(360deg); }
468
+ }
469
+ </style>
470
+ """
471
+
472
+ return {
473
+ home_view: gr.update(visible=False),
474
+ detail_view: gr.update(visible=True),
475
+ selected_movie_id: movie_id,
476
+ detail_title: f"## {movie_info['movie_title']}",
477
+ detail_poster: poster_url,
478
+ detail_explanation: loading_html,
479
+ detail_fallback_info: gr.update(visible=False)
480
+ }
481
+
482
+ def generate_final_explanation(movie_id, persona_name, style):
483
+ if not movie_id:
484
+ return "", gr.update(visible=False)
485
+ explanation, is_fallback = generate_explanation(movie_id, persona_name, style)
486
+ return explanation, gr.update(visible=is_fallback)
487
+
488
+ def back_to_home():
489
+ return {home_view: gr.update(visible=True), detail_view: gr.update(visible=False)}
490
+
491
+ def back_to_persona_selection():
492
+ return {persona_view: gr.update(visible=True), home_view: gr.update(visible=False)}
493
+
494
+ # Event wiring: Persona buttons
495
+ for i, button in enumerate(persona_buttons):
496
+ persona_name = list(persona_archetypes.keys())[i]
497
+ button.click(lambda name=persona_name: select_persona_and_render_home(name),
498
+ inputs=[], outputs=home_page_outputs)
499
+
500
+ # Event wiring: Genre filter
501
+ genre_dropdown.change(filter_by_genre,
502
+ inputs=[genre_dropdown, selected_persona],
503
+ outputs=[home_title, recommendations_grid, no_recommendations_message] + all_card_outputs)
504
+
505
+ # Event wiring: "See Full Explanation" buttons
506
+ for card in home_cards:
507
+ card['btn'].click(transition_to_detail_view,
508
+ inputs=[card['id']],
509
+ outputs=detail_outputs
510
+ ).then(generate_final_explanation,
511
+ inputs=[card['id'], selected_persona, narrative_style],
512
+ outputs=[detail_explanation, detail_fallback_info])
513
+
514
+ # Event wiring: Narrative style change
515
+ def on_style_change(style, movie_id, persona_name):
516
+ """Callback to regenerate explanation when the narrative style changes."""
517
+ if not movie_id:
518
+ return "", gr.update(visible=False)
519
+ explanation, is_fallback = generate_explanation(movie_id, persona_name, style)
520
+ return explanation, gr.update(visible=is_fallback)
521
+
522
+ narrative_style.change(on_style_change,
523
+ [narrative_style, selected_movie_id, selected_persona],
524
+ [detail_explanation, detail_fallback_info])
525
+
526
+ # Event wiring: Back buttons
527
+ back_button.click(back_to_home, [], [home_view, detail_view])
528
+ back_to_persona_button.click(back_to_persona_selection, [], [persona_view, home_view])
529
+
530
+ if __name__ == "__main__":
531
+ demo.launch(debug=True)
config.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # config.py
2
+ # This file will store configuration variables, including API keys and prompt constants.
3
+
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ # Load environment variables from .env file
8
+ load_dotenv()
9
+
10
+ # HuggingFace API token (loaded from .env file)
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
12
+
13
+ # TMDB API Key (loaded from .env file)
14
+ TMDB_API_KEY = os.getenv("TMDB_API_KEY")
15
+
16
+ # LLM Model Name (as specified on Hugging Face)
17
+ LLM_MODEL_NAME = "huggingface/meta-llama/Llama-3.2-3B-Instruct"
18
+ #LLM_MODEL_NAME = "ollama/llama3.2"
19
+
20
+ # Base prompt for LLM explanations
21
+ BASE_PROMPT = """
22
+ Digital Concierge for MovieLens. Explain the recommendation in exactly 3 sentences.
23
+ Max 60 words. Be punchy. Do not start with 'Based on your history'.
24
+ """
25
+
26
+ # Injections for narrative styles
27
+ LOGIC_INJECTION = "Focus on technical alignment and genre overlap."
28
+ SOCIAL_INJECTION = "Focus on community acclaim and fan-favorite status."
data/processed/cached_hooks.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Action Junkie": {
3
+ "364": "In the shadows of the Pride Lands, a king reigns.",
4
+ "6539": "Thirteen curses, one ship, and a pirate's eternal fate awaits.",
5
+ "8368": "Shadows loom in Hogwarts, a darkness stalks the wizarding world.",
6
+ "5816": "Shadows stir at Hogwarts, terror prevails in secret chambers.",
7
+ "8961": "When superheroes disappear, family must use their greatest powers.",
8
+ "3578": "Felled by empire, reborn by bloodshed in ancient Rome."
9
+ },
10
+ "Romantic Dreamer": {
11
+ "4995": "A brilliant mind shattered by madness, love, and genius.",
12
+ "6942": "Seven interconnected love stories weave a tender, tangled tapestry together.",
13
+ "4306": "In a swampy world, an ogre's grumpy heart beats loud.",
14
+ "899": "Golden Raindrops Fall, Dreams Take Flight in a Sunny Hollywood.",
15
+ "80549": "High school deception sets off a chain reaction of lies.",
16
+ "237": "Love, lost, and longing on the City of Light."
17
+ },
18
+ "Cinephile Critic": {
19
+ "2858": "Beauty hides decay, beneath the banality of suburban fa\u00e7ade.",
20
+ "778": "Addicts, outcasts, and the beauty of Edinburgh's darkest underbelly collide.",
21
+ "47": "Darkness descends, seven sins ignite a blood-soaked Detroit nightmare.",
22
+ "4011": "London's underworld unravels in a bloody, hilarious, and twisted game.",
23
+ "68157": "German cinema is about to meet its bloody demise upstairs.",
24
+ "2502": "Love, Cubicles, and Rebellion in a Stifling Corporate Hellhole."
25
+ }
26
+ }
data/processed/home_recommendations.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Action Junkie": [
3
+ {
4
+ "movie_id": 364,
5
+ "movie_title": "Lion King, The (1994)",
6
+ "genres": [
7
+ "Adventure",
8
+ "Animation",
9
+ "Children",
10
+ "Drama",
11
+ "Musical",
12
+ "IMAX"
13
+ ],
14
+ "score": 0.37491641553335764
15
+ },
16
+ {
17
+ "movie_id": 6539,
18
+ "movie_title": "Pirates of the Caribbean: The Curse of the Black Pearl (2003)",
19
+ "genres": [
20
+ "Action",
21
+ "Adventure",
22
+ "Comedy",
23
+ "Fantasy"
24
+ ],
25
+ "score": 0.34449006856982856
26
+ },
27
+ {
28
+ "movie_id": 8368,
29
+ "movie_title": "Harry Potter and the Prisoner of Azkaban (2004)",
30
+ "genres": [
31
+ "Adventure",
32
+ "Fantasy",
33
+ "IMAX"
34
+ ],
35
+ "score": 0.3411372816823042
36
+ },
37
+ {
38
+ "movie_id": 5816,
39
+ "movie_title": "Harry Potter and the Chamber of Secrets (2002)",
40
+ "genres": [
41
+ "Adventure",
42
+ "Fantasy"
43
+ ],
44
+ "score": 0.31476111254911127
45
+ },
46
+ {
47
+ "movie_id": 8961,
48
+ "movie_title": "Incredibles, The (2004)",
49
+ "genres": [
50
+ "Action",
51
+ "Adventure",
52
+ "Animation",
53
+ "Children",
54
+ "Comedy"
55
+ ],
56
+ "score": 0.31073810656518414
57
+ },
58
+ {
59
+ "movie_id": 3578,
60
+ "movie_title": "Gladiator (2000)",
61
+ "genres": [
62
+ "Action",
63
+ "Adventure",
64
+ "Drama"
65
+ ],
66
+ "score": 0.26787650506146904
67
+ }
68
+ ],
69
+ "Romantic Dreamer": [
70
+ {
71
+ "movie_id": 4995,
72
+ "movie_title": "Beautiful Mind, A (2001)",
73
+ "genres": [
74
+ "Drama",
75
+ "Romance"
76
+ ],
77
+ "score": 0.1957933962287504
78
+ },
79
+ {
80
+ "movie_id": 6942,
81
+ "movie_title": "Love Actually (2003)",
82
+ "genres": [
83
+ "Comedy",
84
+ "Drama",
85
+ "Romance"
86
+ ],
87
+ "score": 0.18297972675304644
88
+ },
89
+ {
90
+ "movie_id": 4306,
91
+ "movie_title": "Shrek (2001)",
92
+ "genres": [
93
+ "Adventure",
94
+ "Animation",
95
+ "Children",
96
+ "Comedy",
97
+ "Fantasy",
98
+ "Romance"
99
+ ],
100
+ "score": 0.12709570186798258
101
+ },
102
+ {
103
+ "movie_id": 899,
104
+ "movie_title": "Singin' in the Rain (1952)",
105
+ "genres": [
106
+ "Comedy",
107
+ "Musical",
108
+ "Romance"
109
+ ],
110
+ "score": 0.11874943471592653
111
+ },
112
+ {
113
+ "movie_id": 80549,
114
+ "movie_title": "Easy A (2010)",
115
+ "genres": [
116
+ "Comedy",
117
+ "Romance"
118
+ ],
119
+ "score": 0.09955455760645583
120
+ },
121
+ {
122
+ "movie_id": 237,
123
+ "movie_title": "Forget Paris (1995)",
124
+ "genres": [
125
+ "Comedy",
126
+ "Romance"
127
+ ],
128
+ "score": 0.09745981035143099
129
+ }
130
+ ],
131
+ "Cinephile Critic": [
132
+ {
133
+ "movie_id": 2858,
134
+ "movie_title": "American Beauty (1999)",
135
+ "genres": [
136
+ "Drama",
137
+ "Romance"
138
+ ],
139
+ "score": 0.5709862785097057
140
+ },
141
+ {
142
+ "movie_id": 778,
143
+ "movie_title": "Trainspotting (1996)",
144
+ "genres": [
145
+ "Comedy",
146
+ "Crime",
147
+ "Drama"
148
+ ],
149
+ "score": 0.5420799528869238
150
+ },
151
+ {
152
+ "movie_id": 47,
153
+ "movie_title": "Seven (a.k.a. Se7en) (1995)",
154
+ "genres": [
155
+ "Mystery",
156
+ "Thriller"
157
+ ],
158
+ "score": 0.5304661006687574
159
+ },
160
+ {
161
+ "movie_id": 4011,
162
+ "movie_title": "Snatch (2000)",
163
+ "genres": [
164
+ "Comedy",
165
+ "Crime",
166
+ "Thriller"
167
+ ],
168
+ "score": 0.44253226900550885
169
+ },
170
+ {
171
+ "movie_id": 68157,
172
+ "movie_title": "Inglourious Basterds (2009)",
173
+ "genres": [
174
+ "Action",
175
+ "Drama",
176
+ "War"
177
+ ],
178
+ "score": 0.4239410030372689
179
+ },
180
+ {
181
+ "movie_id": 2502,
182
+ "movie_title": "Office Space (1999)",
183
+ "genres": [
184
+ "Comedy",
185
+ "Crime"
186
+ ],
187
+ "score": 0.4099722885772744
188
+ }
189
+ ]
190
+ }
data/processed/movie_metadata.json ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/persona_archetypes.json ADDED
@@ -0,0 +1,714 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Action Junkie": {
3
+ "specialist_ids": [
4
+ 3,
5
+ 529,
6
+ 388,
7
+ 112,
8
+ 344
9
+ ],
10
+ "target_genres": [
11
+ "Action",
12
+ "Adventure",
13
+ "Sci-Fi"
14
+ ],
15
+ "consolidated_history": {
16
+ "1": -0.3961538461538463,
17
+ "2": -2.0923076923076924,
18
+ "6": 0.9076923076923076,
19
+ "7": -1.2000000000000002,
20
+ "16": 0.9076923076923076,
21
+ "17": -3.0923076923076924,
22
+ "19": 0.4076923076923076,
23
+ "21": -1.5923076923076924,
24
+ "25": -1.0923076923076924,
25
+ "31": -1.9358974358974357,
26
+ "32": 1.6038461538461537,
27
+ "36": -3.0923076923076924,
28
+ "39": -3.0923076923076924,
29
+ "47": 1.4076923076923076,
30
+ "50": 1.4076923076923076,
31
+ "62": 0.1038461538461537,
32
+ "65": -2.2,
33
+ "95": -0.6461538461538463,
34
+ "104": 0.1038461538461537,
35
+ "110": 0.3197881828316611,
36
+ "135": 0.7999999999999998,
37
+ "141": 0.1038461538461537,
38
+ "150": 0.4076923076923076,
39
+ "161": -2.0923076923076924,
40
+ "185": -1.5923076923076924,
41
+ "208": -1.0923076923076924,
42
+ "223": -2.0923076923076924,
43
+ "253": 0.9076923076923076,
44
+ "260": 1.4798587885544408,
45
+ "288": 0.4076923076923076,
46
+ "292": 0.4076923076923076,
47
+ "300": -3.0923076923076924,
48
+ "316": -0.26811594202898537,
49
+ "318": 1.2318840579710146,
50
+ "329": -1.0923076923076924,
51
+ "339": -2.0923076923076924,
52
+ "356": 0.5697881828316611,
53
+ "357": -3.0923076923076924,
54
+ "380": 1.2318840579710146,
55
+ "434": -3.0923076923076924,
56
+ "454": 0.4076923076923076,
57
+ "480": 1.4076923076923076,
58
+ "508": 0.9076923076923076,
59
+ "527": -1.9358974358974357,
60
+ "551": -0.09230769230769242,
61
+ "575": -1.6206896551724137,
62
+ "586": 1.2318840579710146,
63
+ "593": 0.4076923076923076,
64
+ "631": -2.6206896551724137,
65
+ "637": -0.20000000000000018,
66
+ "647": -1.9358974358974357,
67
+ "648": -0.20000000000000018,
68
+ "653": 0.7999999999999998,
69
+ "661": -0.20000000000000018,
70
+ "688": -1.9358974358974357,
71
+ "711": -0.20000000000000018,
72
+ "719": -1.2000000000000002,
73
+ "720": -1.9358974358974357,
74
+ "733": 1.7999999999999998,
75
+ "736": 0.7659420289855072,
76
+ "743": -0.20000000000000018,
77
+ "745": -1.1206896551724137,
78
+ "761": -2.2,
79
+ "778": -0.09230769230769242,
80
+ "780": 1.1038461538461537,
81
+ "784": -2.2,
82
+ "786": 1.7999999999999998,
83
+ "788": -1.1461538461538463,
84
+ "849": 2.5641025641025643,
85
+ "914": -1.9358974358974357,
86
+ "1073": -0.20000000000000018,
87
+ "1093": -1.9358974358974357,
88
+ "1124": -1.9358974358974357,
89
+ "1196": 1.4076923076923076,
90
+ "1197": 0.3793103448275863,
91
+ "1198": 1.4076923076923076,
92
+ "1203": -2.6206896551724137,
93
+ "1210": 1.319788182831661,
94
+ "1214": -2.7681159420289854,
95
+ "1263": -1.9358974358974357,
96
+ "1265": 1.2318840579710146,
97
+ "1270": 0.8197881828316611,
98
+ "1272": -1.9358974358974357,
99
+ "1275": 1.0641025641025643,
100
+ "1302": -1.9358974358974357,
101
+ "1356": 0.4076923076923076,
102
+ "1371": 0.5641025641025643,
103
+ "1393": 0.4076923076923076,
104
+ "1587": 2.0641025641025643,
105
+ "1703": 0.3793103448275863,
106
+ "1917": 1.4076923076923076,
107
+ "2012": 1.2318840579710146,
108
+ "2018": -1.9358974358974357,
109
+ "2028": 0.8197881828316611,
110
+ "2080": -1.9358974358974357,
111
+ "2090": -1.9358974358974357,
112
+ "2105": -0.4358974358974357,
113
+ "2288": 1.5641025641025643,
114
+ "2346": -1.6206896551724137,
115
+ "2353": 1.2318840579710146,
116
+ "2424": -1.9358974358974357,
117
+ "2541": -1.7681159420289854,
118
+ "2571": 1.319788182831661,
119
+ "2617": -1.7681159420289854,
120
+ "2628": 1.2318840579710146,
121
+ "2701": 0.7318840579710146,
122
+ "2712": 0.23188405797101463,
123
+ "2762": -0.7681159420289854,
124
+ "2851": 2.5641025641025643,
125
+ "2858": 1.2318840579710146,
126
+ "2916": 1.2318840579710146,
127
+ "3000": 0.7318840579710146,
128
+ "3024": 2.0641025641025643,
129
+ "3082": 0.23188405797101463,
130
+ "3147": 1.319788182831661,
131
+ "3160": -0.7681159420289854,
132
+ "3210": -1.9358974358974357,
133
+ "3703": 2.5641025641025643,
134
+ "3717": 0.4076923076923076,
135
+ "3793": 1.2318840579710146,
136
+ "3897": -0.7681159420289854,
137
+ "3948": -1.2681159420289854,
138
+ "3949": -2.6020066889632103,
139
+ "3996": -0.7681159420289854,
140
+ "4011": 0.23188405797101463,
141
+ "4014": -1.2681159420289854,
142
+ "4022": -0.7681159420289854,
143
+ "4034": -1.7681159420289854,
144
+ "4039": -0.6206896551724137,
145
+ "4226": -2.2681159420289854,
146
+ "4306": 1.3055972013993005,
147
+ "4518": 2.5641025641025643,
148
+ "4720": -2.7681159420289854,
149
+ "4848": -2.2681159420289854,
150
+ "4878": -1.7681159420289854,
151
+ "4973": 0.23188405797101463,
152
+ "4993": 1.4076923076923076,
153
+ "5010": -1.7681159420289854,
154
+ "5048": -1.9358974358974357,
155
+ "5181": 2.5641025641025643,
156
+ "5218": 0.8793103448275863,
157
+ "5299": -2.7681159420289854,
158
+ "5418": 0.23188405797101463,
159
+ "5459": 0.7318840579710146,
160
+ "5502": -2.7681159420289854,
161
+ "5618": 0.7318840579710146,
162
+ "5746": 2.5641025641025643,
163
+ "5764": 2.0641025641025643,
164
+ "5919": 2.5641025641025643,
165
+ "5952": 1.4076923076923076,
166
+ "5989": 0.23188405797101463,
167
+ "6158": -0.6206896551724137,
168
+ "6238": -1.9358974358974357,
169
+ "6377": 0.3793103448275863,
170
+ "6550": -0.1206896551724137,
171
+ "6662": -0.1206896551724137,
172
+ "6835": 2.5641025641025643,
173
+ "6951": -1.1206896551724137,
174
+ "7099": 0.7318840579710146,
175
+ "7153": 1.4076923076923076,
176
+ "7247": -0.6206896551724137,
177
+ "7899": 2.0641025641025643,
178
+ "7991": 2.5641025641025643,
179
+ "8360": 1.3055972013993005,
180
+ "8869": -1.1206896551724137,
181
+ "26409": 2.0641025641025643,
182
+ "33615": 1.3793103448275863,
183
+ "34150": 0.23188405797101463,
184
+ "43836": 0.3793103448275863,
185
+ "44022": 0.8793103448275863,
186
+ "53121": 1.3793103448275863,
187
+ "58559": 1.4076923076923076,
188
+ "59315": 0.7318840579710146,
189
+ "60069": -0.26811594202898537,
190
+ "60684": 1.2318840579710146,
191
+ "62999": 1.3793103448275863,
192
+ "63239": -0.6206896551724137,
193
+ "68157": 0.9076923076923076,
194
+ "68954": 0.8197881828316611,
195
+ "70286": -0.09230769230769242,
196
+ "70946": 2.5641025641025643,
197
+ "72378": -1.9358974358974357,
198
+ "72998": 0.4076923076923076,
199
+ "78637": 1.3793103448275863,
200
+ "79132": 0.9076923076923076,
201
+ "81847": 0.23188405797101463,
202
+ "84152": 1.2318840579710146,
203
+ "86298": 0.3793103448275863,
204
+ "86332": 0.23188405797101463,
205
+ "89745": 0.7318840579710146,
206
+ "91529": 1.4076923076923076,
207
+ "91542": 0.23188405797101463,
208
+ "95105": 1.3793103448275863,
209
+ "99114": 0.4076923076923076,
210
+ "103335": 1.3793103448275863,
211
+ "104245": -0.1206896551724137,
212
+ "108190": -0.7681159420289854,
213
+ "109487": 1.4076923076923076,
214
+ "111362": 1.2318840579710146,
215
+ "111443": -0.7681159420289854,
216
+ "111659": -0.7681159420289854,
217
+ "111759": 0.23188405797101463,
218
+ "112852": 1.2318840579710146,
219
+ "114180": -0.7681159420289854,
220
+ "115617": -0.09230769230769242
221
+ }
222
+ },
223
+ "Romantic Dreamer": {
224
+ "specialist_ids": [
225
+ 358,
226
+ 461,
227
+ 175,
228
+ 594,
229
+ 578
230
+ ],
231
+ "target_genres": [
232
+ "Romance"
233
+ ],
234
+ "consolidated_history": {
235
+ "2": 0.07543103448275845,
236
+ "3": 0.07543103448275845,
237
+ "11": 1.0754310344827585,
238
+ "17": 0.07543103448275845,
239
+ "28": 0.5754310344827585,
240
+ "39": 0.5754310344827585,
241
+ "46": 0.5754310344827585,
242
+ "48": 0.5754310344827585,
243
+ "70": -0.42456896551724155,
244
+ "110": 1.0754310344827585,
245
+ "140": 0.5754310344827585,
246
+ "145": 0.07543103448275845,
247
+ "150": -0.42456896551724155,
248
+ "158": 0.5754310344827585,
249
+ "160": 1.0754310344827585,
250
+ "165": 0.5754310344827585,
251
+ "186": 0.3154932950191569,
252
+ "196": 0.07543103448275845,
253
+ "204": 0.5754310344827585,
254
+ "208": 1.0754310344827585,
255
+ "223": 0.07543103448275845,
256
+ "252": 1.0754310344827585,
257
+ "253": -3.4245689655172415,
258
+ "260": -2.4444444444444446,
259
+ "270": 0.5754310344827585,
260
+ "273": -0.9245689655172415,
261
+ "284": 0.5754310344827585,
262
+ "289": 0.5754310344827585,
263
+ "315": 0.5754310344827585,
264
+ "316": 0.07543103448275845,
265
+ "317": 0.07543103448275845,
266
+ "318": 1.5555555555555554,
267
+ "329": 0.07543103448275845,
268
+ "339": 1.0754310344827585,
269
+ "356": 2.0555555555555554,
270
+ "364": 1.0754310344827585,
271
+ "367": 0.07543103448275845,
272
+ "368": 0.07543103448275845,
273
+ "377": 0.5754310344827585,
274
+ "380": 0.5754310344827585,
275
+ "393": 0.5754310344827585,
276
+ "442": 1.0754310344827585,
277
+ "457": -0.9345067049808431,
278
+ "480": -0.6845067049808431,
279
+ "494": 1.0754310344827585,
280
+ "497": 0.07543103448275845,
281
+ "509": 0.07543103448275845,
282
+ "527": 0.5555555555555554,
283
+ "531": -0.04166666666666652,
284
+ "539": 1.065493295019157,
285
+ "552": 1.0754310344827585,
286
+ "586": 1.0754310344827585,
287
+ "587": 1.065493295019157,
288
+ "588": 0.5754310344827585,
289
+ "589": 1.0754310344827585,
290
+ "592": 0.5754310344827585,
291
+ "593": 1.065493295019157,
292
+ "594": 0.07543103448275845,
293
+ "595": 1.0754310344827585,
294
+ "597": 1.0754310344827585,
295
+ "605": -0.25609756097560954,
296
+ "613": -0.9629629629629628,
297
+ "648": 1.0754310344827585,
298
+ "653": 1.0754310344827585,
299
+ "671": -3.0416666666666665,
300
+ "736": 0.07543103448275845,
301
+ "762": 0.07543103448275845,
302
+ "780": 0.5754310344827585,
303
+ "786": 1.0754310344827585,
304
+ "799": -3.4245689655172415,
305
+ "830": 0.0370370370370372,
306
+ "839": 1.0754310344827585,
307
+ "858": 0.07543103448275845,
308
+ "902": 0.7439024390243905,
309
+ "919": 0.07543103448275845,
310
+ "920": 0.5754310344827585,
311
+ "990": 0.5754310344827585,
312
+ "1036": -0.6845067049808431,
313
+ "1088": 0.5754310344827585,
314
+ "1188": 0.5754310344827585,
315
+ "1208": -0.9245689655172415,
316
+ "1210": 0.07543103448275845,
317
+ "1213": 1.0555555555555554,
318
+ "1214": 0.07543103448275845,
319
+ "1220": 0.07543103448275845,
320
+ "1225": 0.07543103448275845,
321
+ "1240": 1.0754310344827585,
322
+ "1246": 1.5555555555555554,
323
+ "1259": 1.0555555555555554,
324
+ "1307": 1.0754310344827585,
325
+ "1320": -0.42456896551724155,
326
+ "1370": 0.5754310344827585,
327
+ "1393": 1.0754310344827585,
328
+ "1407": -3.4245689655172415,
329
+ "1429": 0.07543103448275845,
330
+ "1457": 0.5754310344827585,
331
+ "1497": 1.0754310344827585,
332
+ "1518": -0.42456896551724155,
333
+ "1527": -0.9245689655172415,
334
+ "1569": 0.5754310344827585,
335
+ "1580": 0.5754310344827585,
336
+ "1605": 0.07543103448275845,
337
+ "1608": 0.5754310344827585,
338
+ "1616": 0.5754310344827585,
339
+ "1626": 0.07543103448275845,
340
+ "1652": 0.5754310344827585,
341
+ "1676": 0.5754310344827585,
342
+ "1687": 0.07543103448275845,
343
+ "1704": 1.0555555555555554,
344
+ "1707": 0.07543103448275845,
345
+ "1721": 1.1596667367535745,
346
+ "1733": 1.0754310344827585,
347
+ "1784": 0.3154932950191569,
348
+ "1835": 1.0754310344827585,
349
+ "1876": 0.5754310344827585,
350
+ "1917": 0.5754310344827585,
351
+ "1923": 0.07543103448275845,
352
+ "1961": 0.5555555555555554,
353
+ "1974": -0.42456896551724155,
354
+ "2000": 0.5754310344827585,
355
+ "2001": 0.5754310344827585,
356
+ "2002": 1.0754310344827585,
357
+ "2028": 0.07543103448275845,
358
+ "2058": 0.5754310344827585,
359
+ "2059": 0.07543103448275845,
360
+ "2125": 0.5754310344827585,
361
+ "2144": -0.25609756097560954,
362
+ "2175": 0.5754310344827585,
363
+ "2196": 1.0754310344827585,
364
+ "2226": -2.2560975609756095,
365
+ "2268": 0.07543103448275845,
366
+ "2269": 1.0754310344827585,
367
+ "2300": 0.4583333333333335,
368
+ "2338": -2.2560975609756095,
369
+ "2340": 0.07543103448275845,
370
+ "2395": 0.07543103448275845,
371
+ "2396": 0.40966673675357446,
372
+ "2424": 1.0754310344827585,
373
+ "2432": 0.5754310344827585,
374
+ "2488": -3.4245689655172415,
375
+ "2491": 0.5754310344827585,
376
+ "2496": 0.5370370370370372,
377
+ "2526": -3.0416666666666665,
378
+ "2571": -0.6845067049808431,
379
+ "2572": 0.7439024390243905,
380
+ "2589": -0.42456896551724155,
381
+ "2617": -0.42456896551724155,
382
+ "2628": 0.07543103448275845,
383
+ "2640": 0.07543103448275845,
384
+ "2671": 1.0754310344827585,
385
+ "2706": -0.42456896551724155,
386
+ "2716": 0.07543103448275845,
387
+ "2717": 0.07543103448275845,
388
+ "2722": 0.07543103448275845,
389
+ "2762": 0.5654932950191569,
390
+ "2807": 1.0754310344827585,
391
+ "2808": 1.0754310344827585,
392
+ "2906": 1.0754310344827585,
393
+ "2985": 0.5754310344827585,
394
+ "2986": 0.5754310344827585,
395
+ "3004": 0.5754310344827585,
396
+ "3081": 0.7439024390243905,
397
+ "3147": 1.0555555555555554,
398
+ "3155": 0.5370370370370372,
399
+ "3257": 1.0754310344827585,
400
+ "3269": 0.5754310344827585,
401
+ "3353": 0.5754310344827585,
402
+ "3578": -0.9345067049808431,
403
+ "3594": 1.4583333333333335,
404
+ "3616": 0.5754310344827585,
405
+ "3791": 1.4583333333333335,
406
+ "3793": 0.5754310344827585,
407
+ "3824": 0.5754310344827585,
408
+ "3882": -0.25609756097560954,
409
+ "3948": 0.5754310344827585,
410
+ "3969": 0.9583333333333335,
411
+ "4025": 1.0754310344827585,
412
+ "4054": 0.5754310344827585,
413
+ "4069": 1.0754310344827585,
414
+ "4155": 1.0754310344827585,
415
+ "4200": 0.5754310344827585,
416
+ "4270": 0.5754310344827585,
417
+ "4308": 0.7439024390243905,
418
+ "4374": 0.5754310344827585,
419
+ "4517": -3.4245689655172415,
420
+ "4715": -0.9245689655172415,
421
+ "4748": 0.5754310344827585,
422
+ "4794": -3.4245689655172415,
423
+ "4823": 1.0754310344827585,
424
+ "4827": -0.9245689655172415,
425
+ "4902": -3.4245689655172415,
426
+ "4991": 0.07543103448275845,
427
+ "5066": 1.4096667367535745,
428
+ "5092": -0.4629629629629628,
429
+ "5105": -3.4245689655172415,
430
+ "5151": 0.5754310344827585,
431
+ "5299": -0.42456896551724155,
432
+ "5308": 1.0754310344827585,
433
+ "5309": 0.5370370370370372,
434
+ "5364": 0.5754310344827585,
435
+ "5409": 0.07543103448275845,
436
+ "5410": -3.0416666666666665,
437
+ "5527": 0.5754310344827585,
438
+ "5620": 0.5754310344827585,
439
+ "5657": 0.5754310344827585,
440
+ "5909": -3.4245689655172415,
441
+ "5943": 0.5754310344827585,
442
+ "5957": 1.0754310344827585,
443
+ "6006": 1.0754310344827585,
444
+ "6140": -3.4245689655172415,
445
+ "6148": -1.9245689655172415,
446
+ "6155": 1.0754310344827585,
447
+ "6287": 0.07543103448275845,
448
+ "6333": 0.5754310344827585,
449
+ "6373": 1.0754310344827585,
450
+ "6417": 0.07543103448275845,
451
+ "6549": 1.0754310344827585,
452
+ "6577": 0.07543103448275845,
453
+ "6639": -3.4245689655172415,
454
+ "6659": -1.9245689655172415,
455
+ "6665": 0.5754310344827585,
456
+ "6723": 0.07543103448275845,
457
+ "6751": -1.4629629629629628,
458
+ "6754": 1.0754310344827585,
459
+ "6765": 1.0754310344827585,
460
+ "6793": 0.07543103448275845,
461
+ "6967": -3.4245689655172415,
462
+ "6975": -3.4245689655172415,
463
+ "6994": 1.0754310344827585,
464
+ "6996": 0.07543103448275845,
465
+ "7017": 1.0754310344827585,
466
+ "7023": 0.5754310344827585,
467
+ "7031": 0.07543103448275845,
468
+ "7032": 0.07543103448275845,
469
+ "7046": -2.943765964240102,
470
+ "7101": -0.42456896551724155,
471
+ "7114": -3.4245689655172415,
472
+ "7115": -3.4245689655172415,
473
+ "7116": -3.4245689655172415,
474
+ "7149": 0.5754310344827585,
475
+ "7169": 1.266882183908046,
476
+ "7254": 0.24390243902439046,
477
+ "7255": 0.7439024390243905,
478
+ "7259": -0.04166666666666652,
479
+ "7293": 0.7439024390243905,
480
+ "7320": 0.5754310344827585,
481
+ "7323": 0.0370370370370372,
482
+ "7345": 0.07543103448275845,
483
+ "7361": 0.7439024390243905,
484
+ "7373": -3.4245689655172415,
485
+ "7395": 0.0370370370370372,
486
+ "7450": 0.5754310344827585,
487
+ "7564": -3.4245689655172415,
488
+ "7650": -3.4245689655172415,
489
+ "7742": -3.4245689655172415,
490
+ "7782": 0.5754310344827585,
491
+ "7883": -3.4245689655172415,
492
+ "7982": -3.4245689655172415,
493
+ "8035": -3.4245689655172415,
494
+ "8118": 0.07543103448275845,
495
+ "8132": 0.5754310344827585,
496
+ "8268": 0.5754310344827585,
497
+ "8362": -0.04166666666666652,
498
+ "8482": -3.4245689655172415,
499
+ "8521": -3.4245689655172415,
500
+ "8533": 0.785456836848062,
501
+ "8620": -3.4245689655172415,
502
+ "8636": 1.0754310344827585,
503
+ "8643": 1.016882183908046,
504
+ "8727": -1.9245689655172415,
505
+ "8778": 0.07543103448275845,
506
+ "8866": 0.5754310344827585,
507
+ "8869": 1.0754310344827585,
508
+ "8911": 1.4583333333333335,
509
+ "26095": -2.7560975609756095,
510
+ "31433": 0.5754310344827585,
511
+ "32289": 1.4583333333333335,
512
+ "33145": -0.04166666666666652,
513
+ "33679": -1.2560975609756095,
514
+ "33880": -0.04166666666666652,
515
+ "36525": 0.7439024390243905,
516
+ "38388": 1.4583333333333335,
517
+ "39427": -3.0416666666666665,
518
+ "42728": 1.7439024390243905,
519
+ "43904": -3.0416666666666665,
520
+ "44613": 1.4583333333333335,
521
+ "45668": 1.0370370370370372,
522
+ "45720": 0.5370370370370372,
523
+ "45880": 0.4583333333333335,
524
+ "47382": 1.4583333333333335,
525
+ "47610": 1.2439024390243905,
526
+ "48412": 0.24390243902439046,
527
+ "49130": 0.7439024390243905,
528
+ "49772": 1.0370370370370372,
529
+ "52328": 0.7439024390243905,
530
+ "52668": 0.0370370370370372,
531
+ "55052": 1.7439024390243905,
532
+ "56389": 0.0370370370370372,
533
+ "56715": 0.7439024390243905,
534
+ "56949": 0.4583333333333335,
535
+ "58107": 0.4583333333333335,
536
+ "58154": 0.5370370370370372,
537
+ "58998": -2.2560975609756095,
538
+ "60069": 0.7439024390243905,
539
+ "65350": -2.7560975609756095,
540
+ "66203": 0.7439024390243905,
541
+ "68157": -2.4444444444444446,
542
+ "68269": 0.5370370370370372,
543
+ "69757": 1.2439024390243905,
544
+ "69844": 0.24390243902439046,
545
+ "70599": 1.2439024390243905,
546
+ "70687": -2.2560975609756095,
547
+ "71579": 0.7439024390243905,
548
+ "71823": 0.5370370370370372,
549
+ "72407": -0.4629629629629628,
550
+ "73017": 1.0370370370370372,
551
+ "74458": 1.0555555555555554,
552
+ "75446": -1.4629629629629628,
553
+ "76251": 0.5370370370370372,
554
+ "78316": 0.0370370370370372,
555
+ "79879": -3.462962962962963,
556
+ "81562": 1.0370370370370372,
557
+ "81591": -1.2560975609756095,
558
+ "81845": 1.0370370370370372,
559
+ "85334": -2.7560975609756095,
560
+ "87028": -1.2560975609756095,
561
+ "88163": 0.7439024390243905,
562
+ "89386": -2.7560975609756095,
563
+ "91500": -2.4444444444444446,
564
+ "99114": 1.5555555555555554,
565
+ "104841": -2.4444444444444446,
566
+ "106782": 2.0555555555555554
567
+ }
568
+ },
569
+ "Cinephile Critic": {
570
+ "specialist_ids": [
571
+ 338,
572
+ 549,
573
+ 296,
574
+ 473,
575
+ 394
576
+ ],
577
+ "target_genres": [
578
+ "Drama",
579
+ "Crime",
580
+ "Mystery",
581
+ "Thriller"
582
+ ],
583
+ "consolidated_history": {
584
+ "34": 0.04347826086956541,
585
+ "50": 1.4803047194351542,
586
+ "60": -1.4189189189189189,
587
+ "110": 0.7367149758454107,
588
+ "150": 0.04347826086956541,
589
+ "153": 0.04347826086956541,
590
+ "185": 0.04347826086956541,
591
+ "208": -1.9565217391304346,
592
+ "225": 1.0434782608695654,
593
+ "260": -1.6666666666666665,
594
+ "262": 0.08108108108108114,
595
+ "293": 0.9572072072072073,
596
+ "296": 0.9487179487179487,
597
+ "316": -0.9565217391304346,
598
+ "318": 1.202962577962578,
599
+ "339": 0.04347826086956541,
600
+ "344": -0.9565217391304346,
601
+ "356": 0.9384057971014492,
602
+ "367": 0.04347826086956541,
603
+ "380": 0.04347826086956541,
604
+ "410": -0.9565217391304346,
605
+ "454": -0.9565217391304346,
606
+ "457": 1.0434782608695654,
607
+ "480": 0.04347826086956541,
608
+ "527": -0.08974358974358972,
609
+ "553": 1.0434782608695654,
610
+ "588": -0.9565217391304346,
611
+ "589": -0.6666666666666665,
612
+ "590": -0.9565217391304346,
613
+ "592": 0.04347826086956541,
614
+ "593": 1.0641025641025643,
615
+ "595": -0.9565217391304346,
616
+ "858": 1.5641025641025643,
617
+ "1036": 0.3333333333333335,
618
+ "1196": -1.6666666666666665,
619
+ "1198": 0.8333333333333335,
620
+ "1201": 0.833333333333333,
621
+ "1203": 2.0641025641025643,
622
+ "1210": -1.6666666666666665,
623
+ "1221": 2.0641025641025643,
624
+ "1291": -0.6666666666666665,
625
+ "1704": 0.833333333333333,
626
+ "1760": -2.918918918918919,
627
+ "1895": -1.4189189189189189,
628
+ "2028": 0.8333333333333333,
629
+ "2136": -2.418918918918919,
630
+ "2253": -0.9189189189189189,
631
+ "2297": -0.41891891891891886,
632
+ "2300": -0.9189189189189189,
633
+ "2324": 0.833333333333333,
634
+ "2329": 1.0810810810810811,
635
+ "2485": -0.9189189189189189,
636
+ "2571": 0.8333333333333333,
637
+ "2581": -0.41891891891891886,
638
+ "2596": 0.5810810810810811,
639
+ "2628": -3.166666666666667,
640
+ "2959": 1.077962577962578,
641
+ "3264": -1.4189189189189189,
642
+ "3897": 1.5810810810810811,
643
+ "3988": -1.4189189189189189,
644
+ "4144": 0.833333333333333,
645
+ "4226": 1.3333333333333335,
646
+ "4873": 0.5810810810810811,
647
+ "4878": 0.5810810810810811,
648
+ "4880": 0.08108108108108114,
649
+ "4975": 0.5810810810810811,
650
+ "4979": 1.0810810810810811,
651
+ "5349": 0.06410256410256432,
652
+ "5378": -3.166666666666667,
653
+ "5445": -0.9358974358974357,
654
+ "5618": 0.33333333333333304,
655
+ "5785": 0.08108108108108114,
656
+ "5952": -0.0512820512820511,
657
+ "6281": -1.4189189189189189,
658
+ "6874": 1.5641025641025643,
659
+ "7147": 0.5810810810810811,
660
+ "7153": -0.4358974358974357,
661
+ "7156": 0.33333333333333304,
662
+ "7361": 1.5810810810810811,
663
+ "8784": 1.0641025641025643,
664
+ "8957": 0.5810810810810811,
665
+ "27773": 1.0641025641025643,
666
+ "27904": 0.5810810810810811,
667
+ "30749": 0.5641025641025643,
668
+ "30793": -0.4358974358974357,
669
+ "30810": 1.0810810810810811,
670
+ "31364": 0.833333333333333,
671
+ "33493": -3.166666666666667,
672
+ "34405": 0.5641025641025643,
673
+ "36535": 0.5810810810810811,
674
+ "40614": 0.08108108108108114,
675
+ "45501": 0.08108108108108114,
676
+ "46578": 1.0810810810810811,
677
+ "48082": 0.5810810810810811,
678
+ "48322": 0.08108108108108114,
679
+ "48385": 0.08108108108108114,
680
+ "48394": 0.33333333333333304,
681
+ "48516": 1.3333333333333335,
682
+ "58559": 0.3333333333333335,
683
+ "65261": 0.833333333333333,
684
+ "79132": 1.0833333333333333,
685
+ "122912": -1.4358974358974357,
686
+ "122916": -1.9358974358974357,
687
+ "122924": 1.3333333333333335,
688
+ "158813": -2.6666666666666665,
689
+ "160848": 0.33333333333333304,
690
+ "166528": -2.166666666666667,
691
+ "169034": 0.33333333333333304,
692
+ "170705": -0.4358974358974357,
693
+ "171765": -0.666666666666667,
694
+ "174053": 2.0641025641025643,
695
+ "175569": -1.4358974358974357,
696
+ "176371": -1.9358974358974357,
697
+ "177593": 2.0641025641025643,
698
+ "177765": -0.4358974358974357,
699
+ "180031": -0.666666666666667,
700
+ "183897": -1.4358974358974357,
701
+ "185029": -1.9358974358974357,
702
+ "187541": -1.9358974358974357,
703
+ "187593": -1.9358974358974357,
704
+ "189043": -0.4358974358974357,
705
+ "189111": 0.06410256410256432,
706
+ "190207": -1.4358974358974357,
707
+ "190209": 1.0641025641025643,
708
+ "190213": -1.9358974358974357,
709
+ "190215": -1.4358974358974357,
710
+ "190219": -1.9358974358974357,
711
+ "190221": -1.9358974358974357
712
+ }
713
+ }
714
+ }
data/processed/top_similar_items.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fe6573811e76b714860e29df2874b71e8ed224054197f4770f0323823765555
3
+ size 440321
data/processed/user_avg_ratings.json ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "1": 4.366379310344827,
3
+ "2": 3.9482758620689653,
4
+ "3": 2.4358974358974357,
5
+ "4": 3.5555555555555554,
6
+ "5": 3.6363636363636362,
7
+ "6": 3.4936305732484074,
8
+ "7": 3.2302631578947367,
9
+ "8": 3.574468085106383,
10
+ "9": 3.260869565217391,
11
+ "10": 3.2785714285714285,
12
+ "11": 3.78125,
13
+ "12": 4.390625,
14
+ "13": 3.6451612903225805,
15
+ "14": 3.3958333333333335,
16
+ "15": 3.448148148148148,
17
+ "16": 3.7244897959183674,
18
+ "17": 4.20952380952381,
19
+ "18": 3.7320717131474104,
20
+ "19": 2.607396870554765,
21
+ "20": 3.590909090909091,
22
+ "21": 3.260722347629797,
23
+ "22": 2.5714285714285716,
24
+ "23": 3.6487603305785123,
25
+ "24": 3.65,
26
+ "25": 4.8076923076923075,
27
+ "26": 3.238095238095238,
28
+ "27": 3.5481481481481483,
29
+ "28": 3.020175438596491,
30
+ "29": 4.1419753086419755,
31
+ "30": 4.735294117647059,
32
+ "31": 3.92,
33
+ "32": 3.7549019607843137,
34
+ "33": 3.7884615384615383,
35
+ "34": 3.4186046511627906,
36
+ "35": 4.086956521739131,
37
+ "36": 2.6333333333333333,
38
+ "37": 4.142857142857143,
39
+ "38": 3.217948717948718,
40
+ "39": 4.0,
41
+ "40": 3.766990291262136,
42
+ "41": 3.2534562211981566,
43
+ "42": 3.565909090909091,
44
+ "43": 4.552631578947368,
45
+ "44": 3.3541666666666665,
46
+ "45": 3.8759398496240602,
47
+ "46": 4.0,
48
+ "47": 3.0535714285714284,
49
+ "48": 4.03030303030303,
50
+ "49": 4.261904761904762,
51
+ "50": 2.7806451612903227,
52
+ "51": 3.7757660167130918,
53
+ "52": 4.476923076923077,
54
+ "53": 5.0,
55
+ "54": 3.0303030303030303,
56
+ "55": 2.84,
57
+ "56": 3.8043478260869565,
58
+ "57": 3.392857142857143,
59
+ "58": 3.9017857142857144,
60
+ "59": 4.355140186915888,
61
+ "60": 3.727272727272727,
62
+ "61": 4.051282051282051,
63
+ "62": 4.081967213114754,
64
+ "63": 3.6309963099630997,
65
+ "64": 3.7688588007736943,
66
+ "65": 4.029411764705882,
67
+ "66": 4.0202898550724635,
68
+ "67": 3.9722222222222223,
69
+ "68": 3.233730158730159,
70
+ "69": 4.369565217391305,
71
+ "70": 4.32258064516129,
72
+ "71": 3.6,
73
+ "72": 4.155555555555556,
74
+ "73": 3.711904761904762,
75
+ "74": 4.271186440677966,
76
+ "75": 3.2318840579710146,
77
+ "76": 3.0840336134453783,
78
+ "77": 4.0,
79
+ "78": 3.1639344262295084,
80
+ "79": 4.203125,
81
+ "80": 4.2604790419161676,
82
+ "81": 2.769230769230769,
83
+ "82": 3.381057268722467,
84
+ "83": 3.309322033898305,
85
+ "84": 3.689419795221843,
86
+ "85": 3.7058823529411766,
87
+ "86": 3.9285714285714284,
88
+ "87": 3.9523809523809526,
89
+ "88": 4.035714285714286,
90
+ "89": 3.472972972972973,
91
+ "90": 4.074074074074074,
92
+ "91": 3.3956521739130436,
93
+ "92": 3.9375,
94
+ "93": 4.288659793814433,
95
+ "94": 3.0357142857142856,
96
+ "95": 4.0625,
97
+ "96": 3.8846153846153846,
98
+ "97": 4.194444444444445,
99
+ "98": 3.9130434782608696,
100
+ "99": 3.8679245283018866,
101
+ "100": 3.945945945945946,
102
+ "101": 3.557377049180328,
103
+ "102": 3.357142857142857,
104
+ "103": 3.907161803713528,
105
+ "104": 3.5073260073260073,
106
+ "105": 4.116343490304709,
107
+ "106": 4.4393939393939394,
108
+ "107": 3.911764705882353,
109
+ "108": 3.986842105263158,
110
+ "109": 3.220472440944882,
111
+ "110": 3.7254901960784315,
112
+ "111": 3.3397832817337463,
113
+ "112": 3.5923076923076924,
114
+ "113": 3.6466666666666665,
115
+ "114": 3.435483870967742,
116
+ "115": 3.767857142857143,
117
+ "116": 3.4367816091954024,
118
+ "117": 3.3393939393939394,
119
+ "118": 3.8181818181818183,
120
+ "119": 4.176744186046512,
121
+ "120": 3.409090909090909,
122
+ "121": 3.293103448275862,
123
+ "122": 4.546232876712328,
124
+ "123": 4.035714285714286,
125
+ "124": 3.99,
126
+ "125": 3.859722222222222,
127
+ "126": 3.289473684210526,
128
+ "127": 3.1136363636363638,
129
+ "128": 4.363636363636363,
130
+ "129": 3.9214285714285713,
131
+ "130": 3.5357142857142856,
132
+ "131": 3.4420289855072466,
133
+ "132": 3.0389048991354466,
134
+ "133": 3.0,
135
+ "134": 3.5714285714285716,
136
+ "135": 3.630824372759857,
137
+ "136": 3.315315315315315,
138
+ "137": 3.978723404255319,
139
+ "138": 3.522727272727273,
140
+ "139": 2.1443298969072164,
141
+ "140": 3.4991776315789473,
142
+ "141": 3.5386904761904763,
143
+ "142": 3.8157894736842106,
144
+ "143": 3.323943661971831,
145
+ "144": 3.62890625,
146
+ "145": 3.347826086956522,
147
+ "146": 3.140625,
148
+ "147": 3.375,
149
+ "148": 3.7395833333333335,
150
+ "149": 2.7241379310344827,
151
+ "150": 3.576923076923077,
152
+ "151": 3.542372881355932,
153
+ "152": 4.023809523809524,
154
+ "153": 2.217877094972067,
155
+ "154": 4.382352941176471,
156
+ "155": 3.608695652173913,
157
+ "156": 3.692211055276382,
158
+ "157": 3.4761904761904763,
159
+ "158": 3.423076923076923,
160
+ "159": 3.2731958762886597,
161
+ "160": 2.708237986270023,
162
+ "161": 3.8333333333333335,
163
+ "162": 4.2368421052631575,
164
+ "163": 3.0,
165
+ "164": 4.25,
166
+ "165": 3.5384615384615383,
167
+ "166": 4.073684210526316,
168
+ "167": 3.439306358381503,
169
+ "168": 4.462765957446808,
170
+ "169": 4.24907063197026,
171
+ "170": 3.34,
172
+ "171": 4.634146341463414,
173
+ "172": 3.826923076923077,
174
+ "173": 3.48,
175
+ "174": 3.656716417910448,
176
+ "175": 3.5416666666666665,
177
+ "176": 4.055555555555555,
178
+ "177": 3.375553097345133,
179
+ "178": 4.090909090909091,
180
+ "179": 3.782608695652174,
181
+ "180": 3.5625,
182
+ "181": 2.940677966101695,
183
+ "182": 3.5112589559877176,
184
+ "183": 3.824561403508772,
185
+ "184": 3.705223880597015,
186
+ "185": 3.5319148936170213,
187
+ "186": 4.079646017699115,
188
+ "187": 3.7751937984496124,
189
+ "188": 4.395833333333333,
190
+ "189": 4.1,
191
+ "190": 3.909090909090909,
192
+ "191": 3.7411764705882353,
193
+ "192": 4.045454545454546,
194
+ "193": 3.8,
195
+ "194": 3.475,
196
+ "195": 3.5294117647058822,
197
+ "196": 3.9375,
198
+ "197": 3.857142857142857,
199
+ "198": 3.491304347826087,
200
+ "199": 3.3953168044077136,
201
+ "200": 3.809880239520958,
202
+ "201": 4.1,
203
+ "202": 3.8188585607940446,
204
+ "203": 3.8444444444444446,
205
+ "204": 3.9578313253012047,
206
+ "205": 3.8703703703703702,
207
+ "206": 4.04,
208
+ "207": 2.875,
209
+ "208": 3.3076923076923075,
210
+ "209": 4.242857142857143,
211
+ "210": 4.079710144927536,
212
+ "211": 3.904494382022472,
213
+ "212": 3.590725806451613,
214
+ "213": 3.8273809523809526,
215
+ "214": 2.8636363636363638,
216
+ "215": 3.9081632653061225,
217
+ "216": 3.6646341463414633,
218
+ "217": 2.761827079934747,
219
+ "218": 3.3653846153846154,
220
+ "219": 3.165719696969697,
221
+ "220": 3.963768115942029,
222
+ "221": 4.126888217522659,
223
+ "222": 3.166,
224
+ "223": 3.2733333333333334,
225
+ "224": 4.2592592592592595,
226
+ "225": 3.6533333333333333,
227
+ "226": 3.4763313609467454,
228
+ "227": 4.202127659574468,
229
+ "228": 3.74,
230
+ "229": 3.7384615384615385,
231
+ "230": 2.8597122302158273,
232
+ "231": 3.8541666666666665,
233
+ "232": 3.2505800464037122,
234
+ "233": 3.3,
235
+ "234": 3.504950495049505,
236
+ "235": 3.646153846153846,
237
+ "236": 3.966666666666667,
238
+ "237": 3.36,
239
+ "238": 3.6818181818181817,
240
+ "239": 4.026881720430108,
241
+ "240": 3.9140625,
242
+ "241": 3.9802631578947367,
243
+ "242": 3.6,
244
+ "243": 4.138888888888889,
245
+ "244": 3.774193548387097,
246
+ "245": 2.7142857142857144,
247
+ "246": 4.252450980392157,
248
+ "247": 3.7466666666666666,
249
+ "248": 3.7450980392156863,
250
+ "249": 3.6964627151051626,
251
+ "250": 4.222222222222222,
252
+ "251": 4.869565217391305,
253
+ "252": 4.197368421052632,
254
+ "253": 4.351063829787234,
255
+ "254": 3.9803149606299213,
256
+ "255": 2.5681818181818183,
257
+ "256": 3.9454022988505746,
258
+ "257": 3.2,
259
+ "258": 4.2,
260
+ "259": 3.1206896551724137,
261
+ "260": 3.7598684210526314,
262
+ "261": 3.858490566037736,
263
+ "262": 3.1052631578947367,
264
+ "263": 3.716666666666667,
265
+ "264": 3.5625,
266
+ "265": 3.3496932515337425,
267
+ "266": 3.5,
268
+ "267": 4.175438596491228,
269
+ "268": 3.2713178294573644,
270
+ "269": 3.6206896551724137,
271
+ "270": 3.2,
272
+ "271": 3.186046511627907,
273
+ "272": 3.629032258064516,
274
+ "273": 4.0,
275
+ "274": 3.235884101040119,
276
+ "275": 4.086848635235732,
277
+ "276": 4.390243902439025,
278
+ "277": 3.5714285714285716,
279
+ "278": 3.875,
280
+ "279": 3.647727272727273,
281
+ "280": 3.9005102040816326,
282
+ "281": 3.2142857142857144,
283
+ "282": 4.033755274261603,
284
+ "283": 3.3142857142857145,
285
+ "284": 3.715909090909091,
286
+ "285": 3.9583333333333335,
287
+ "286": 3.7877358490566038,
288
+ "287": 2.6217105263157894,
289
+ "288": 3.1459715639810426,
290
+ "289": 3.625,
291
+ "290": 4.142322097378277,
292
+ "291": 4.258064516129032,
293
+ "292": 3.3015695067264574,
294
+ "293": 2.619047619047619,
295
+ "294": 2.610983981693364,
296
+ "295": 3.7439024390243905,
297
+ "296": 4.166666666666667,
298
+ "297": 2.5972222222222223,
299
+ "298": 2.363684771033014,
300
+ "299": 3.652173913043478,
301
+ "300": 4.296875,
302
+ "301": 3.3201754385964914,
303
+ "302": 4.0,
304
+ "303": 3.9056603773584904,
305
+ "304": 3.8703703703703702,
306
+ "305": 3.9246676514032495,
307
+ "306": 3.3169642857142856,
308
+ "307": 2.6656410256410257,
309
+ "308": 2.4260869565217393,
310
+ "309": 3.8398058252427183,
311
+ "310": 3.53125,
312
+ "311": 2.3392857142857144,
313
+ "312": 3.708520179372197,
314
+ "313": 3.4323529411764704,
315
+ "314": 3.046875,
316
+ "315": 3.3636363636363638,
317
+ "316": 3.0,
318
+ "317": 3.7301587301587302,
319
+ "318": 3.755972696245734,
320
+ "319": 4.428571428571429,
321
+ "320": 3.525,
322
+ "321": 3.5714285714285716,
323
+ "322": 3.411214953271028,
324
+ "323": 3.173469387755102,
325
+ "324": 3.142857142857143,
326
+ "325": 3.5444444444444443,
327
+ "326": 4.016447368421052,
328
+ "327": 4.104166666666667,
329
+ "328": 3.2254901960784315,
330
+ "329": 2.869565217391304,
331
+ "330": 3.6902834008097165,
332
+ "331": 3.6279761904761907,
333
+ "332": 3.599250936329588,
334
+ "333": 2.64,
335
+ "334": 3.418831168831169,
336
+ "335": 3.6785714285714284,
337
+ "336": 4.321428571428571,
338
+ "337": 4.207792207792208,
339
+ "338": 2.9358974358974357,
340
+ "339": 4.070080862533692,
341
+ "340": 4.214285714285714,
342
+ "341": 3.798076923076923,
343
+ "342": 2.9393939393939394,
344
+ "343": 4.059322033898305,
345
+ "344": 3.7681159420289854,
346
+ "345": 3.903225806451613,
347
+ "346": 3.682926829268293,
348
+ "347": 3.5555555555555554,
349
+ "348": 4.672727272727273,
350
+ "349": 3.72972972972973,
351
+ "350": 3.25,
352
+ "351": 3.6666666666666665,
353
+ "352": 3.7806122448979593,
354
+ "353": 3.488888888888889,
355
+ "354": 3.734513274336283,
356
+ "355": 3.8846153846153846,
357
+ "356": 4.02076124567474,
358
+ "357": 3.9569190600522193,
359
+ "358": 3.2560975609756095,
360
+ "359": 3.418918918918919,
361
+ "360": 3.36,
362
+ "361": 3.1804123711340204,
363
+ "362": 4.243119266055046,
364
+ "363": 3.3,
365
+ "364": 4.190476190476191,
366
+ "365": 2.7509025270758123,
367
+ "366": 3.9516129032258065,
368
+ "367": 3.8594594594594596,
369
+ "368": 2.8422174840085286,
370
+ "369": 3.391472868217054,
371
+ "370": 3.5483870967741935,
372
+ "371": 4.548780487804878,
373
+ "372": 3.3559322033898304,
374
+ "373": 3.19,
375
+ "374": 3.6363636363636362,
376
+ "375": 3.909090909090909,
377
+ "376": 4.06390977443609,
378
+ "377": 3.9122137404580153,
379
+ "378": 4.0,
380
+ "379": 3.388888888888889,
381
+ "380": 3.6732348111658455,
382
+ "381": 3.542194092827004,
383
+ "382": 3.508591065292096,
384
+ "383": 3.7941176470588234,
385
+ "384": 3.085714285714286,
386
+ "385": 3.4029850746268657,
387
+ "386": 2.75,
388
+ "387": 3.2585199610516065,
389
+ "388": 3.6206896551724137,
390
+ "389": 4.117647058823529,
391
+ "390": 3.8395061728395063,
392
+ "391": 3.715025906735751,
393
+ "392": 3.2,
394
+ "393": 3.841463414634146,
395
+ "394": 2.9565217391304346,
396
+ "395": 3.0491803278688523,
397
+ "396": 3.142857142857143,
398
+ "397": 3.9565217391304346,
399
+ "398": 4.25,
400
+ "399": 3.9444444444444446,
401
+ "400": 4.511627906976744,
402
+ "401": 3.5277777777777777,
403
+ "402": 3.8541666666666665,
404
+ "403": 3.764705882352941,
405
+ "404": 3.4,
406
+ "405": 4.008333333333334,
407
+ "406": 3.25,
408
+ "407": 4.113636363636363,
409
+ "408": 4.092198581560283,
410
+ "409": 3.8412698412698414,
411
+ "410": 4.059880239520958,
412
+ "411": 3.2598425196850394,
413
+ "412": 3.892156862745098,
414
+ "413": 4.410714285714286,
415
+ "414": 3.391957005189029,
416
+ "415": 4.085106382978723,
417
+ "416": 3.0686274509803924,
418
+ "417": 4.462686567164179,
419
+ "418": 3.639784946236559,
420
+ "419": 3.803225806451613,
421
+ "420": 3.8226950354609928,
422
+ "421": 4.108108108108108,
423
+ "422": 3.3225806451612905,
424
+ "423": 3.717391304347826,
425
+ "424": 3.6739130434782608,
426
+ "425": 3.5326797385620914,
427
+ "426": 3.6818181818181817,
428
+ "427": 3.1011904761904763,
429
+ "428": 2.64,
430
+ "429": 3.9827586206896552,
431
+ "430": 3.7413793103448274,
432
+ "431": 2.725,
433
+ "432": 3.646153846153846,
434
+ "433": 3.772727272727273,
435
+ "434": 3.781115879828326,
436
+ "435": 4.321428571428571,
437
+ "436": 3.1792452830188678,
438
+ "437": 3.80327868852459,
439
+ "438": 3.3551181102362206,
440
+ "439": 4.119047619047619,
441
+ "440": 3.893939393939394,
442
+ "441": 4.522222222222222,
443
+ "442": 1.275,
444
+ "443": 4.121621621621622,
445
+ "444": 3.8095238095238093,
446
+ "445": 3.857142857142857,
447
+ "446": 3.2142857142857144,
448
+ "447": 3.871794871794872,
449
+ "448": 2.8473712446351933,
450
+ "449": 3.289473684210526,
451
+ "450": 4.0,
452
+ "451": 3.7941176470588234,
453
+ "452": 4.556930693069307,
454
+ "453": 3.942122186495177,
455
+ "454": 3.7666666666666666,
456
+ "455": 3.456140350877193,
457
+ "456": 3.813953488372093,
458
+ "457": 3.46,
459
+ "458": 4.1525423728813555,
460
+ "459": 4.346153846153846,
461
+ "460": 4.201219512195122,
462
+ "461": 2.9444444444444446,
463
+ "462": 3.4065934065934065,
464
+ "463": 3.787878787878788,
465
+ "464": 3.753623188405797,
466
+ "465": 4.156521739130435,
467
+ "466": 3.9292035398230087,
468
+ "467": 3.409090909090909,
469
+ "468": 3.393939393939394,
470
+ "469": 3.6731182795698927,
471
+ "470": 3.5125,
472
+ "471": 3.875,
473
+ "472": 4.120689655172414,
474
+ "473": 3.418918918918919,
475
+ "474": 3.398956356736243,
476
+ "475": 4.409677419354839,
477
+ "476": 3.681159420289855,
478
+ "477": 3.736666666666667,
479
+ "478": 2.8181818181818183,
480
+ "479": 3.320441988950276,
481
+ "480": 3.27511961722488,
482
+ "481": 2.806451612903226,
483
+ "482": 3.40625,
484
+ "483": 3.618818681318681,
485
+ "484": 3.8145454545454545,
486
+ "485": 3.869565217391304,
487
+ "486": 4.017857142857143,
488
+ "487": 3.142857142857143,
489
+ "488": 3.8828828828828827,
490
+ "489": 3.017746913580247,
491
+ "490": 3.15929203539823,
492
+ "491": 4.15625,
493
+ "492": 3.8677685950413223,
494
+ "493": 3.6885245901639343,
495
+ "494": 4.2272727272727275,
496
+ "495": 4.009433962264151,
497
+ "496": 3.413793103448276,
498
+ "497": 3.5686274509803924,
499
+ "498": 4.057142857142857,
500
+ "499": 3.8333333333333335,
501
+ "500": 3.244186046511628,
502
+ "501": 3.2,
503
+ "502": 3.4285714285714284,
504
+ "503": 3.3421052631578947,
505
+ "504": 3.82183908045977,
506
+ "505": 4.17741935483871,
507
+ "506": 3.261904761904762,
508
+ "507": 3.380952380952381,
509
+ "508": 2.1458333333333335,
510
+ "509": 3.21627408993576,
511
+ "510": 2.9027777777777777,
512
+ "511": 4.125,
513
+ "512": 3.78,
514
+ "513": 3.890625,
515
+ "514": 3.3110831234256928,
516
+ "515": 4.846153846153846,
517
+ "516": 3.6923076923076925,
518
+ "517": 2.38625,
519
+ "518": 3.5416666666666665,
520
+ "519": 4.326923076923077,
521
+ "520": 3.881720430107527,
522
+ "521": 3.6,
523
+ "522": 3.83,
524
+ "523": 4.693333333333333,
525
+ "524": 3.4580152671755724,
526
+ "525": 3.542,
527
+ "526": 4.189655172413793,
528
+ "527": 4.119760479041916,
529
+ "528": 3.4726027397260273,
530
+ "529": 3.2,
531
+ "530": 3.7777777777777777,
532
+ "531": 3.772727272727273,
533
+ "532": 4.28,
534
+ "533": 4.381578947368421,
535
+ "534": 3.8048076923076923,
536
+ "535": 2.6666666666666665,
537
+ "536": 3.4,
538
+ "537": 4.0638297872340425,
539
+ "538": 4.472972972972973,
540
+ "539": 3.8026315789473686,
541
+ "540": 4.0,
542
+ "541": 3.3908045977011496,
543
+ "542": 3.5088495575221237,
544
+ "543": 4.453947368421052,
545
+ "544": 4.363636363636363,
546
+ "545": 3.369565217391304,
547
+ "546": 3.4375,
548
+ "547": 4.142857142857143,
549
+ "548": 4.096153846153846,
550
+ "549": 3.6666666666666665,
551
+ "550": 4.107142857142857,
552
+ "551": 3.7154471544715446,
553
+ "552": 3.11968085106383,
554
+ "553": 4.337349397590361,
555
+ "554": 4.0,
556
+ "555": 3.4550173010380623,
557
+ "556": 4.078125,
558
+ "557": 3.8518518518518516,
559
+ "558": 3.9285714285714284,
560
+ "559": 3.2857142857142856,
561
+ "560": 3.567685589519651,
562
+ "561": 3.372277227722772,
563
+ "562": 4.093073593073593,
564
+ "563": 3.300469483568075,
565
+ "564": 3.5796178343949046,
566
+ "565": 3.6551724137931036,
567
+ "566": 3.5405405405405403,
568
+ "567": 2.2454545454545456,
569
+ "568": 4.130434782608695,
570
+ "569": 4.0,
571
+ "570": 3.4162303664921465,
572
+ "571": 2.5714285714285716,
573
+ "572": 4.122580645161291,
574
+ "573": 4.212374581939799,
575
+ "574": 3.9565217391304346,
576
+ "575": 3.1666666666666665,
577
+ "576": 3.1,
578
+ "577": 3.5652173913043477,
579
+ "578": 3.962962962962963,
580
+ "579": 3.9726027397260273,
581
+ "580": 3.529816513761468,
582
+ "581": 4.375,
583
+ "582": 4.026785714285714,
584
+ "583": 3.294642857142857,
585
+ "584": 4.108433734939759,
586
+ "585": 4.336065573770492,
587
+ "586": 4.365384615384615,
588
+ "587": 3.9757575757575756,
589
+ "588": 3.25,
590
+ "589": 4.025,
591
+ "590": 3.355769230769231,
592
+ "591": 3.2777777777777777,
593
+ "592": 3.5851063829787235,
594
+ "593": 3.266990291262136,
595
+ "594": 3.9245689655172415,
596
+ "595": 4.2,
597
+ "596": 3.495133819951338,
598
+ "597": 3.9774266365688487,
599
+ "598": 3.8095238095238093,
600
+ "599": 2.6420500403551253,
601
+ "600": 2.991480996068152,
602
+ "601": 4.425742574257426,
603
+ "602": 3.3925925925925924,
604
+ "603": 3.5079533404029695,
605
+ "604": 3.48,
606
+ "605": 3.2104072398190047,
607
+ "606": 3.6573991031390136,
608
+ "607": 3.786096256684492,
609
+ "608": 3.1341756919374246,
610
+ "609": 3.27027027027027,
611
+ "610": 3.6885560675883258
612
+ }
data_prep.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # data_prep.py - COMPLETE ARCHETYPE EDITION
2
+ # Integrates: 20-rating threshold, Recency Bias Penalty, and Specialist Centroid Logic.
3
+
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import json
8
+ import pickle
9
+ import os
10
+ import re
11
+ from tqdm import tqdm
12
+ import time
13
+ import litellm
14
+ from config import HF_TOKEN, LLM_MODEL_NAME
15
+
16
+ # --- CUSTOM JSON ENCODER ---
17
+ class NumpyEncoder(json.JSONEncoder):
18
+ """Custom encoder for numpy data types to avoid JSON serializable errors."""
19
+ def default(self, obj):
20
+ if isinstance(obj, np.integer): return int(obj)
21
+ if isinstance(obj, np.floating): return float(obj)
22
+ if isinstance(obj, np.ndarray): return obj.tolist()
23
+ return super(NumpyEncoder, self).default(obj)
24
+
25
+ # --- CONSTANTS & CONFIG ---
26
+ MOVIELENS_DIR = 'data/ml-latest-small'
27
+ PROCESSED_DIR = 'data/processed'
28
+ MIN_RATINGS_THRESHOLD = 20 # Popularity filter for items
29
+ MIN_USER_RATINGS = 20 # Minimum activity for specialist candidates
30
+ DISCOVERY_ERA_YEAR = 1980 # Threshold for Recency Bias logic
31
+
32
+ # Specialist Persona Targets (Used to find the top 5 specialists per category)
33
+ PERSONA_TARGETS = {
34
+ "Action Junkie": ["Action", "Adventure", "Sci-Fi"],
35
+ "Romantic Dreamer": ["Romance"],
36
+ "Cinephile Critic": ["Drama", "Crime", "Mystery", "Thriller"]
37
+ }
38
+
39
+ os.makedirs(PROCESSED_DIR, exist_ok=True)
40
+
41
+ # --- LLM HELPER ---
42
+ def call_llm_for_hook(prompt):
43
+ """Calls the LLM via LiteLLM to generate snappy movie hooks."""
44
+ try:
45
+ print(f"DEBUG: Calling LLM for hook with model: {LLM_MODEL_NAME}")
46
+ start_ts = time.time()
47
+ response = litellm.completion(
48
+ model=LLM_MODEL_NAME,
49
+ messages=[{"role": "user", "content": prompt}],
50
+ api_key=HF_TOKEN,
51
+ max_tokens=40,
52
+ temperature=0.8
53
+ )
54
+ print(f"DEBUG: Full LLM Response Object: {response}")
55
+ print(f"DEBUG: LLM call completed in {time.time() - start_ts:.4f}s")
56
+ return response.choices[0].message.content.strip().replace('"', '')
57
+ except Exception as e:
58
+ return "A standout pick for your collection."
59
+
60
+ def extract_year(title):
61
+ """Regex to pull (YYYY) from movie titles."""
62
+ match = re.search(r'\((\d{4})\)', title)
63
+ return int(match.group(1)) if match else 0
64
+
65
+ # --- CORE PREPARATION ---
66
+ def prepare_data():
67
+ print("Step 1: Loading raw MovieLens data...")
68
+ ratings = pd.read_csv(f'{MOVIELENS_DIR}/ratings.csv')
69
+ movies = pd.read_csv(f'{MOVIELENS_DIR}/movies.csv')
70
+
71
+ # 2 Process Metadata
72
+ print("Step 2: Processing metadata and applying thresholds...")
73
+ movies['year'] = movies['title'].apply(extract_year) # Extract (YYYY) from titles for recency bias
74
+ # Filter movies by popularity: count ratings per movie, then keep only those with ≥20 ratings
75
+ popular_ids = ratings.groupby('movieId').size()
76
+ popular_ids = popular_ids[popular_ids >= MIN_RATINGS_THRESHOLD].index
77
+
78
+ filtered_movies = movies[movies['movieId'].isin(popular_ids)].copy()
79
+ movie_meta = {}
80
+ for _, row in filtered_movies.iterrows():
81
+ movie_meta[str(row['movieId'])] = {
82
+ 'movie_title': row['title'],
83
+ 'genres': row['genres'].split('|'),
84
+ 'year': row['year']
85
+ }
86
+
87
+ # === RATING NORMALIZATION (User Bias Correction) ===
88
+ # Problem: Different users have different rating scales (generous vs. strict)
89
+ # Solution: Subtract each user's average rating from their individual ratings
90
+ # This creates deviation scores that are comparable across users
91
+ # Formula: normalized_rating = rating - user_avg_rating
92
+ print(" - Normalizing ratings to remove user bias...")
93
+ user_avg_ratings = ratings.groupby('userId')['rating'].mean().to_dict()
94
+ ratings['rating_normalized'] = ratings.apply(
95
+ lambda row: row['rating'] - user_avg_ratings.get(row['userId'], 0), axis=1
96
+ )
97
+
98
+ # Use normalized ratings for all collaborative filtering steps
99
+ ratings_normalized = ratings[['userId', 'movieId', 'rating_normalized']].copy()
100
+ ratings_normalized.columns = ['userId', 'movieId', 'rating']
101
+ print(" - Normalization complete. All subsequent steps use deviation-adjusted ratings.\n")
102
+
103
+ # 3 Identify Specialist Persona Archetypes (The Centroid Logic)
104
+ print("Step 3: Identifying Specialist Centroids (Top 5 users per persona)...")
105
+ step_start = time.time()
106
+ persona_archetypes = {}
107
+
108
+ for persona, target_genres in PERSONA_TARGETS.items():
109
+ # Find movies in target genres
110
+ genre_movies = movies[movies['genres'].str.contains('|'.join(target_genres))]['movieId']
111
+
112
+ # Identify active users (MIN_USER_RATINGS ≥20 total ratings)
113
+ active_users = ratings_normalized.groupby('userId').size()
114
+ active_users = active_users[active_users >= MIN_USER_RATINGS].index
115
+
116
+ # Calculate Specialization Score for each active user using NORMALIZED ratings
117
+ # Formula: Specialization(u) = (Genre Density) × (Genre Passion)
118
+ # Genre Density = (Ratings in Target Genre / Total Ratings by User)
119
+ # Genre Passion = AvgRating(u, Target Genre) - based on NORMALIZED ratings
120
+ specialization_scores = {}
121
+
122
+ for user_id in active_users:
123
+ user_ratings = ratings_normalized[ratings_normalized['userId'] == user_id]
124
+ total_user_ratings = len(user_ratings)
125
+
126
+ # Get genre ratings for this user
127
+ genre_ratings = user_ratings[user_ratings['movieId'].isin(genre_movies)]
128
+ genre_rating_count = len(genre_ratings)
129
+
130
+ # Skip if user hasn't rated any genre movies
131
+ if genre_rating_count == 0:
132
+ continue
133
+
134
+ # Calculate specialization components (using normalized ratings)
135
+ genre_density = genre_rating_count / total_user_ratings # Proportion (normalized)
136
+ genre_passion = genre_ratings['rating'].mean() # Average DEVIATION for genre
137
+
138
+ # Specialization score (combined metric)
139
+ specialization_scores[user_id] = genre_density * genre_passion
140
+
141
+ # Select top 5 specialists by specialization score
142
+ top_5_specialists = sorted(specialization_scores.items(), key=lambda x: x[1], reverse=True)[:5]
143
+ top_5_specialists = [user_id for user_id, score in top_5_specialists]
144
+ print(f" - Found specialists for {persona}: {top_5_specialists}")
145
+
146
+ # Aggregate their NORMALIZED ratings (The Centroid)
147
+ centroid_ratings = ratings_normalized[ratings_normalized['userId'].isin(top_5_specialists)]
148
+ # Create centroid vector: average NORMALIZED rating per movie from the 5 specialists
149
+ aggregated_history = centroid_ratings.groupby('movieId')['rating'].mean().to_dict()
150
+
151
+ persona_archetypes[persona] = {
152
+ "specialist_ids": top_5_specialists,
153
+ "target_genres": target_genres,
154
+ "consolidated_history": aggregated_history
155
+ }
156
+
157
+ print(f">>> Step 3 (Specialist Centroids) complete in {time.time() - step_start:.2f}s")
158
+
159
+ # Step 4: Pre-compute Item-Item Similarities for app.py
160
+ print("Step 4: Pre-computing Item-Item Similarities (O(1) Lookups)...")
161
+ step_start = time.time()
162
+ pivot = ratings_normalized.pivot(index='userId', columns='movieId', values='rating').fillna(0)
163
+ item_sim_matrix = cosine_similarity(pivot.T)
164
+
165
+ # Define m_ids early and create O(1) lookup mapping
166
+ m_ids = pivot.columns.tolist()
167
+ m_id_to_index = {m_id: idx for idx, m_id in enumerate(m_ids)}
168
+
169
+ # Genre coverage analysis with O(1) lookup (fixed performance issue)
170
+ # === STEP 4A: Genre Coverage Analysis for Adaptive K Selection ===
171
+ # Problem: How many similar items (K) should we pre-compute for genre filtering?
172
+ # - Too low (K=20): Genre filter may not find enough candidates for all genres
173
+ # - Too high (K=100): Wastes storage and compute for marginal benefit
174
+ # Solution: Empirically test different K values and pick the smallest K that ensures
175
+ # every genre has sufficient similar items available (≥1.0 average per movie).
176
+ # This ensures genre-filtered recommendations in app.py won't be starved for choices.
177
+
178
+ print("Analyzing genre coverage for different K values...")
179
+ genre_coverage_analysis = {}
180
+ K_CANDIDATES = [20, 30, 50, 100]
181
+ optimal_k = 50 # Default fallback
182
+
183
+ for genre in ["Action", "Drama", "Romance", "Comedy", "Sci-Fi", "Thriller"]:
184
+ genre_movie_ids = set(movies[movies['genres'].str.contains(genre)]['movieId'])
185
+ genre_coverage_analysis[genre] = {}
186
+
187
+ # For each K candidate, measure coverage with O(1) lookup
188
+ for k in K_CANDIDATES:
189
+ coverage_count = 0
190
+ for m_id in m_ids:
191
+ m_idx = m_id_to_index[m_id] # O(1) lookup instead of m_ids.index()
192
+ sim_scores = item_sim_matrix[m_idx]
193
+ top_k_indices = np.argsort(sim_scores)[-(k+1):-1]
194
+ genre_match = sum(1 for idx in top_k_indices if m_ids[idx] in genre_movie_ids)
195
+ coverage_count += genre_match
196
+
197
+ avg_coverage = coverage_count / len(m_ids)
198
+ genre_coverage_analysis[genre][k] = avg_coverage
199
+
200
+ print("Avg similar items per movie in each genre:")
201
+ for genre, coverage_dict in genre_coverage_analysis.items():
202
+ print(f" {genre}: {coverage_dict}")
203
+
204
+ # Adaptively select K: find smallest K that gives >=1.0 avg items per genre
205
+ TARGET_MIN_COVERAGE = 1.0 # At least 1 similar item per genre on average
206
+ for k in sorted(K_CANDIDATES):
207
+ min_coverage = min(genre_coverage_analysis[g][k] for g in genre_coverage_analysis.keys())
208
+ if min_coverage >= TARGET_MIN_COVERAGE:
209
+ optimal_k = k
210
+ print(f"\n✅ Optimal K selected: {optimal_k} (min genre coverage: {min_coverage:.2f})")
211
+ break
212
+
213
+ print(f"Using K={optimal_k} for top similar items\n")
214
+
215
+ # Pre-compute similar items with adaptive K
216
+ top_sim_dict = {}
217
+ for i, m_id in enumerate(tqdm(m_ids, desc="Similarities")):
218
+ if str(m_id) not in movie_meta: continue
219
+ sim_scores = item_sim_matrix[i]
220
+ # Get top K similar (excluding self)
221
+ top_indices = np.argsort(sim_scores)[-(optimal_k+1):-1]
222
+ top_sim_dict[str(m_id)] = {str(m_ids[idx]): float(sim_scores[idx]) for idx in top_indices}
223
+
224
+ print(f">>> Step 4 (Item-Item Similarities with K={optimal_k}) complete in {time.time() - step_start:.2f}s\n")
225
+
226
+ # Save components
227
+ with open(f'{PROCESSED_DIR}/movie_metadata.json', 'w') as f:
228
+ json.dump(movie_meta, f, indent=4)
229
+ with open(f'{PROCESSED_DIR}/persona_archetypes.json', 'w') as f:
230
+ json.dump(persona_archetypes, f, cls=NumpyEncoder, indent=4)
231
+ with open(f'{PROCESSED_DIR}/user_avg_ratings.json', 'w') as f:
232
+ json.dump(user_avg_ratings, f, indent=4)
233
+ with open(f'{PROCESSED_DIR}/top_similar_items.pkl', 'wb') as f:
234
+ pickle.dump(top_sim_dict, f)
235
+
236
+ return persona_archetypes, movie_meta, pivot
237
+
238
+ # --- RECOMMENDATION ENGINE ---
239
+ def compute_home_recommendations(persona_archetypes, movie_meta, pivot):
240
+ print("Step 5: Computing Layer 4 Home Recs with Recency Bias...")
241
+ home_recs = {}
242
+
243
+ for persona, data in persona_archetypes.items():
244
+ history = data['consolidated_history']
245
+ target_genres = data['target_genres']
246
+
247
+ print(f" - Building centroid and finding neighborhood for {persona}...")
248
+ # Construct Centroid Vector
249
+ centroid_vec = pd.Series(0.0, index=pivot.columns, dtype=float) # Init with float to avoid warning
250
+ for m_id, rating in history.items():
251
+ if m_id in centroid_vec: centroid_vec[m_id] = rating
252
+
253
+ # Neighborhood Search (Find users similar to the centroid)
254
+ user_sims = cosine_similarity([centroid_vec], pivot)[0]
255
+ neighbor_idx = np.argsort(user_sims)[-50:]
256
+ neighbor_ratings = pivot.iloc[neighbor_idx]
257
+
258
+ # Weighted Candidate Scores (Layer 3: Collaborative Prediction)
259
+ # Formula: R_{p,m} = Σ(Similarity × Rating) / Σ|Similarity|
260
+ # This normalizes by total similarity to get average weighted score
261
+ numerator = neighbor_ratings.multiply(user_sims[neighbor_idx], axis=0).sum()
262
+ denominator = user_sims[neighbor_idx].sum()
263
+ candidates = numerator / denominator if denominator > 0 else numerator
264
+
265
+ print(f" - Applying Layer 4 re-ranking for {persona}...")
266
+ final_list = []
267
+ for m_id, raw_score in candidates.items():
268
+ m_id_str = str(m_id)
269
+ if m_id_str not in movie_meta: continue
270
+ if m_id in history: continue # Don't recommend what they've seen
271
+
272
+ meta = movie_meta[m_id_str]
273
+
274
+ # Layer 4a: Genre Affinity Boost
275
+ genre_match = any(g in target_genres for g in meta['genres'])
276
+ genre_multiplier = 2.5 if genre_match else 0.4
277
+
278
+ # Layer 4b: Recency Bias Logic (Claude's Penalty vs Bonus)
279
+ # Movies post-1980 get a bonus, older movies get a penalty proportional to age
280
+ if meta['year'] < DISCOVERY_ERA_YEAR:
281
+ # Penalty scales from 0.9 (1979) down to 0.5 (1930s)
282
+ age_factor = max(0.5, 1.0 - (DISCOVERY_ERA_YEAR - meta['year']) / 120)
283
+ else:
284
+ # Bonus scales from 1.0 (1980) up to 1.3 (Modern)
285
+ age_factor = min(1.3, 1.0 + (meta['year'] - DISCOVERY_ERA_YEAR) / 100)
286
+
287
+ final_score = raw_score * genre_multiplier * age_factor
288
+
289
+ final_list.append({
290
+ 'movie_id': m_id,
291
+ 'movie_title': meta['movie_title'],
292
+ 'genres': meta['genres'],
293
+ 'score': final_score
294
+ })
295
+
296
+ # Top 6 for Home Screen
297
+ home_recs[persona] = sorted(final_list, key=lambda x: x['score'], reverse=True)[:6]
298
+
299
+ top_movies_str = ", ".join([f"'{r['movie_title']}'" for r in home_recs[persona][:2]])
300
+ print(f" - Top recs for {persona}: {top_movies_str}...")
301
+
302
+ with open(f'{PROCESSED_DIR}/home_recommendations.json', 'w') as f:
303
+ json.dump(home_recs, f, indent=4)
304
+ return home_recs
305
+
306
+ def generate_hooks(home_recs):
307
+ print("Step 6: Generating LLM Hooks for Discovery Feed...")
308
+ cached_hooks = {}
309
+ for persona, recs in home_recs.items():
310
+ cached_hooks[persona] = {}
311
+ for r in recs:
312
+ title = r['movie_title']
313
+ prompt = f"Generate a 5-10 word snappy, atmospheric hook for the movie: {title}."
314
+ hook = call_llm_for_hook(prompt)
315
+ cached_hooks[persona][str(r['movie_id'])] = hook
316
+ time.sleep(0.5) # Increased sleep to avoid HF Backend Error 40001
317
+
318
+ with open(f'{PROCESSED_DIR}/cached_hooks.json', 'w') as f:
319
+ json.dump(cached_hooks, f, indent=4)
320
+
321
+ if __name__ == "__main__":
322
+ total_start = time.time()
323
+
324
+ # --- Phase 1: Data Loading, Cleaning, and Initial Computations ---
325
+ step_start = time.time()
326
+ archetypes, meta, p_matrix = prepare_data()
327
+ print(f">>> Phase 1 (Data Prep) complete in {time.time() - step_start:.2f}s\n")
328
+
329
+ # --- Phase 2: Compute Home Recommendations using Centroid Logic ---
330
+ step_start = time.time()
331
+ recs = compute_home_recommendations(archetypes, meta, p_matrix)
332
+ print(f">>> Phase 2 (Home Recs) complete in {time.time() - step_start:.2f}s\n")
333
+
334
+ # --- Phase 3: Generate LLM Hooks for the UI ---
335
+ step_start = time.time()
336
+ generate_hooks(recs)
337
+ print(f">>> Phase 3 (LLM Hooks) complete in {time.time() - step_start:.2f}s\n")
338
+
339
+ print(f"✅ SUCCESS: Full data pipeline complete in {time.time() - total_start:.2f} seconds.")
home-page-recos.jpg ADDED

Git LFS Details

  • SHA256: 29f5feb8ea2a78e14e15d9154c7011d951be8a68a99baf92d7a5ab6563fd933e
  • Pointer size: 131 Bytes
  • Size of remote file: 636 kB
movielens_analysis.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import seaborn as sns
4
+ import numpy as np
5
+
6
+ # Load the datasets
7
+ try:
8
+ movies_df = pd.read_csv('data/ml-latest-small/movies.csv')
9
+ ratings_df = pd.read_csv('data/ml-latest-small/ratings.csv')
10
+ except FileNotFoundError:
11
+ print("Ensure 'movies.csv' and 'ratings.csv' are in the 'data/ml-latest-small/' directory.")
12
+ exit()
13
+
14
+ print("--- Movies DataFrame Info ---")
15
+ movies_df.info()
16
+ print("\n--- Ratings DataFrame Info ---")
17
+ ratings_df.info()
18
+
19
+ # Merge movies and ratings dataframes
20
+ # This allows us to link movie titles and genres with ratings
21
+ movie_ratings = pd.merge(ratings_df, movies_df, on='movieId')
22
+
23
+ print("\n--- Merged DataFrame Info ---")
24
+ movie_ratings.info()
25
+
26
+ print("\n--- First 5 rows of Merged DataFrame ---")
27
+ print(movie_ratings.head())
28
+
29
+ # --- Analysis 1: Most Rated Movies ---
30
+ # Calculate the number of ratings for each movie
31
+ movie_counts = movie_ratings['title'].value_counts()
32
+ print("\n--- Top 10 Most Rated Movies ---")
33
+ print(movie_counts.head(10))
34
+
35
+ # Visualize Most Rated Movies
36
+ plt.figure(figsize=(12, 6))
37
+ sns.barplot(y=movie_counts.head(10).index, x=movie_counts.head(10).values, palette='viridis')
38
+ plt.title('Top 10 Most Rated Movies')
39
+ plt.xlabel('Number of Ratings')
40
+ plt.ylabel('Movie Title')
41
+ plt.tight_layout()
42
+ plt.savefig('most_rated_movies.png')
43
+ plt.show()
44
+
45
+ # --- Analysis 2: Highest Rated Movies (with a minimum number of ratings) ---
46
+ # Calculate average rating for each movie
47
+ average_ratings = movie_ratings.groupby('title')['rating'].mean()
48
+
49
+ # Combine with rating counts
50
+ movie_popularity = pd.DataFrame({
51
+ 'rating_count': movie_counts,
52
+ 'average_rating': average_ratings
53
+ })
54
+
55
+ # Drop movies with less than a certain number of ratings (e.g., 50 ratings) to avoid skewed results
56
+ min_ratings = 50
57
+ popular_movies = movie_popularity[movie_popularity['rating_count'] >= min_ratings]
58
+
59
+ # Sort by average rating
60
+ highest_rated_movies = popular_movies.sort_values(by='average_rating', ascending=False)
61
+ print(f"\n--- Top 10 Highest Rated Movies (with at least {min_ratings} ratings) ---")
62
+ print(highest_rated_movies.head(10))
63
+
64
+ # Visualize Highest Rated Movies
65
+ plt.figure(figsize=(12, 6))
66
+ sns.barplot(y=highest_rated_movies.head(10).index, x=highest_rated_movies.head(10)['average_rating'], palette='magma')
67
+ plt.title(f'Top 10 Highest Rated Movies (at least {min_ratings} ratings)')
68
+ plt.xlabel('Average Rating')
69
+ plt.ylabel('Movie Title')
70
+ plt.tight_layout()
71
+ plt.savefig('highest_rated_movies.png')
72
+ plt.show()
73
+
74
+ # --- Analysis 3: Genre Distribution ---
75
+ # Split genres and expand into multiple rows
76
+ genres_split = movies_df['genres'].str.split('|', expand=True)
77
+ genres_long = genres_split.stack().reset_index(level=1, drop=True).to_frame(name='genre')
78
+
79
+ # Count occurrences of each genre
80
+ genre_counts = genres_long['genre'].value_counts()
81
+ print("\n--- Top 10 Most Common Genres ---")
82
+ print(genre_counts.head(10))
83
+
84
+ # Visualize Genre Distribution
85
+ plt.figure(figsize=(12, 7))
86
+ sns.barplot(y=genre_counts.index, x=genre_counts.values, palette='cubehelix')
87
+ plt.title('Distribution of Movie Genres')
88
+ plt.xlabel('Number of Movies')
89
+ plt.ylabel('Genre')
90
+ plt.tight_layout()
91
+ plt.savefig('genre_distribution.png')
92
+ plt.show()
93
+
94
+ # --- Analysis 4: User Rating Behavior (Distribution of all ratings) ---
95
+ plt.figure(figsize=(10, 6))
96
+ sns.histplot(ratings_df['rating'], bins=np.arange(0.5, 5.5, 0.5), kde=True, stat='density')
97
+ plt.title('Distribution of All Ratings')
98
+ plt.xlabel('Rating')
99
+ plt.ylabel('Density')
100
+ plt.xticks(np.arange(0.5, 5.5, 0.5))
101
+ plt.grid(axis='y', alpha=0.75)
102
+ plt.tight_layout()
103
+ plt.savefig('rating_distribution.png')
104
+ plt.show()
105
+
106
+ # --- Analysis 5: Correlation between number of ratings and average rating ---
107
+ plt.figure(figsize=(10, 8))
108
+ sns.scatterplot(x='rating_count', y='average_rating', data=movie_popularity, alpha=0.6)
109
+ plt.title('Correlation between Number of Ratings and Average Rating')
110
+ plt.xlabel('Number of Ratings')
111
+ plt.ylabel('Average Rating')
112
+ plt.grid(True, linestyle='--', alpha=0.6)
113
+ plt.tight_layout()
114
+ plt.savefig('rating_count_vs_avg_rating.png')
115
+ plt.show()
116
+
117
+ # --- Analysis 6: Distribution of Movies by Number of Ratings ---
118
+ plt.figure(figsize=(12, 6))
119
+ sns.histplot(movie_counts, bins=50, kde=False) # Use movie_counts directly
120
+ plt.title('Distribution of Movies by Number of Ratings')
121
+ plt.xlabel('Number of Ratings Received by Movie')
122
+ plt.ylabel('Number of Movies')
123
+ plt.grid(axis='y', alpha=0.75)
124
+ plt.yscale('log') # Use a log scale for better visibility of lower counts
125
+ plt.tight_layout()
126
+ plt.savefig('movie_rating_count_distribution.png')
127
+ plt.show()
128
+
129
+ # --- Analysis 7: Distribution of Users by Number of Ratings ---
130
+ user_rating_counts = ratings_df.groupby('userId').size()
131
+ plt.figure(figsize=(12, 6))
132
+ sns.histplot(user_rating_counts, bins=50, kde=False)
133
+ plt.title('Distribution of Users by Number of Movies Rated')
134
+ plt.xlabel('Number of Movies Rated by User')
135
+ plt.ylabel('Number of Users')
136
+ plt.grid(axis='y', alpha=0.75)
137
+ plt.yscale('log') # Log scale is often useful for such distributions
138
+ plt.tight_layout()
139
+ plt.savefig('user_rating_count_distribution.png')
140
+ plt.show()
141
+
142
+ # --- Analysis 8: List Users who Rated More Than 1000 Movies ---
143
+ users_high_ratings = user_rating_counts[user_rating_counts > 1000]
144
+ print(f"\n--- Users who rated more than 1000 movies (Total: {len(users_high_ratings)}) ---")
145
+ if not users_high_ratings.empty:
146
+ for user_id, count in users_high_ratings.items():
147
+ print(f"User ID: {user_id}, Ratings: {count}")
148
+ else:
149
+ print("No users found who rated more than 1000 movies.")
150
+
151
+
152
+ print("\nAnalysis complete. Visualizations saved as PNG files.")
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ gradio
5
+ requests
6
+ tqdm
7
+ huggingface_hub
8
+ python-dotenv
9
+ #scikit-surprise
10
+ litellm
11
+ matplotlib
12
+ seaborn