Commit ·
c6769c2
1
Parent(s): 22e5dbc
Clean deploy without venv
Browse files- .gitattributes +2 -0
- .gitignore +35 -0
- .vscode/settings.json +4 -0
- Dockerfile +25 -0
- app.py +164 -0
- eda/.ipynb_checkpoints/Untitled-checkpoint.ipynb +6 -0
- eda/Untitled.ipynb +1251 -0
- main.py +28 -0
- models/metadata.pkl +3 -0
- models/movie_index.faiss +3 -0
- requirements.txt +5 -0
- src/__init__.py +0 -0
- src/__pycache__/preprocessing.cpython-311.pyc +0 -0
- src/__pycache__/preprocessing.cpython-312.pyc +0 -0
- src/__pycache__/recommender.cpython-311.pyc +0 -0
- src/ingest.py +121 -0
- src/preprocessing.py +56 -0
- src/recommender.py +156 -0
- test.py +19 -0
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.faiss filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
.venv/
|
| 8 |
+
venv/
|
| 9 |
+
env/
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
data/
|
| 13 |
+
*.csv
|
| 14 |
+
*.json
|
| 15 |
+
|
| 16 |
+
eda/
|
| 17 |
+
*.ipynb
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
models/
|
| 22 |
+
*.faiss
|
| 23 |
+
*.pkl
|
| 24 |
+
*.bin
|
| 25 |
+
*.pt
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
.env
|
| 29 |
+
|
| 30 |
+
.vscode/
|
| 31 |
+
.idea/
|
| 32 |
+
|
| 33 |
+
.DS_Store
|
| 34 |
+
Thumbs.db.venv/
|
| 35 |
+
data/
|
.vscode/settings.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python-envs.pythonProjects": [],
|
| 3 |
+
"python-envs.defaultEnvManager": "ms-python.python:venv"
|
| 4 |
+
}
|
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.9 or 3.10 (Slim version to save space)
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy files
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Install dependencies
|
| 11 |
+
# We add --no-cache-dir to keep the image small
|
| 12 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
+
|
| 14 |
+
# Copy the rest of the application
|
| 15 |
+
COPY . .
|
| 16 |
+
|
| 17 |
+
# Create the directory for models if it doesn't exist
|
| 18 |
+
RUN mkdir -p models
|
| 19 |
+
|
| 20 |
+
# Expose port 7860 (Hugging Face default)
|
| 21 |
+
EXPOSE 7860
|
| 22 |
+
|
| 23 |
+
# Command to run the app
|
| 24 |
+
# Note: Hugging Face expects the app on port 7860, not 8000
|
| 25 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
import os
|
| 6 |
+
import uvicorn
|
| 7 |
+
|
| 8 |
+
# Import our custom modules
|
| 9 |
+
from src.recommender import MovieRecommender
|
| 10 |
+
from src.ingest import run_weekly_update
|
| 11 |
+
|
| 12 |
+
# --- Configuration ---
|
| 13 |
+
app = FastAPI(
|
| 14 |
+
title="CineMatch API",
|
| 15 |
+
description="A content-based movie recommender using FAISS & Transformers.",
|
| 16 |
+
version="1.0.0"
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
# Enable CORS (Allows your future mobile app/website to talk to this API)
|
| 20 |
+
app.add_middleware(
|
| 21 |
+
CORSMiddleware,
|
| 22 |
+
allow_origins=["*"], # In production, replace with specific domain
|
| 23 |
+
allow_credentials=True,
|
| 24 |
+
allow_methods=["*"],
|
| 25 |
+
allow_headers=["*"],
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# Global State for the AI Model
|
| 29 |
+
rec_engine: Optional[MovieRecommender] = None
|
| 30 |
+
|
| 31 |
+
# --- Lifespan Events (Startup/Shutdown) ---
|
| 32 |
+
@app.on_event("startup")
|
| 33 |
+
async def startup_event():
|
| 34 |
+
"""
|
| 35 |
+
Load the heavy AI model once when the server starts.
|
| 36 |
+
"""
|
| 37 |
+
global rec_engine
|
| 38 |
+
rec_engine = MovieRecommender()
|
| 39 |
+
|
| 40 |
+
if os.path.exists('models/movie_index.faiss'):
|
| 41 |
+
print(" [INFO] Loading AI Model from disk...")
|
| 42 |
+
rec_engine.load('models/')
|
| 43 |
+
print(f" [INFO] Model loaded. Index contains {rec_engine.index.ntotal} movies.")
|
| 44 |
+
else:
|
| 45 |
+
print(" [WARNING] No model found at 'models/'. API will return errors until model is built.")
|
| 46 |
+
|
| 47 |
+
# --- Pydantic Data Models (Schema) ---
|
| 48 |
+
class SearchRequest(BaseModel):
|
| 49 |
+
query: str
|
| 50 |
+
k: int = 10
|
| 51 |
+
|
| 52 |
+
class VibeRequest(BaseModel):
|
| 53 |
+
tags: List[str] = [] # e.g., ["Sci-Fi", "90s"]
|
| 54 |
+
description: str = "" # e.g., "Robots fighting in space"
|
| 55 |
+
k: int = 10
|
| 56 |
+
|
| 57 |
+
class UserHistoryRequest(BaseModel):
|
| 58 |
+
liked_movies: List[str] # e.g., ["The Matrix", "Inception"]
|
| 59 |
+
k: int = 10
|
| 60 |
+
|
| 61 |
+
class MovieResponse(BaseModel):
|
| 62 |
+
movie_id: int
|
| 63 |
+
title: str
|
| 64 |
+
score: float
|
| 65 |
+
|
| 66 |
+
# --- Helper Function ---
|
| 67 |
+
def check_model():
|
| 68 |
+
if not rec_engine or not rec_engine.index:
|
| 69 |
+
raise HTTPException(status_code=503, detail="AI Model is not loaded. Run ingestion first.")
|
| 70 |
+
|
| 71 |
+
# --- API Endpoints ---
|
| 72 |
+
|
| 73 |
+
@app.get("/")
|
| 74 |
+
def health_check():
|
| 75 |
+
"""Simple check to see if server is running."""
|
| 76 |
+
return {"status": "online and active!!!", "model_loaded": rec_engine is not None}
|
| 77 |
+
|
| 78 |
+
@app.post("/search", response_model=List[MovieResponse])
|
| 79 |
+
def search_movies(request: SearchRequest):
|
| 80 |
+
"""
|
| 81 |
+
Semantic Search: Convert query to vector -> Find nearest movies.
|
| 82 |
+
"""
|
| 83 |
+
check_model()
|
| 84 |
+
results = rec_engine.recommend_on_text(request.query, k=request.k)
|
| 85 |
+
return results
|
| 86 |
+
|
| 87 |
+
@app.post("/recommend/vibe", response_model=dict)
|
| 88 |
+
def vibe_check(request: VibeRequest):
|
| 89 |
+
"""
|
| 90 |
+
Recommends based on a mix of Tags and Description.
|
| 91 |
+
"""
|
| 92 |
+
check_model()
|
| 93 |
+
|
| 94 |
+
# Construct the "Soup"
|
| 95 |
+
# We repeat tags to give them more weight in the vector space
|
| 96 |
+
tag_str = " ".join(request.tags) * 2
|
| 97 |
+
query_soup = f"{tag_str} {request.description}".strip()
|
| 98 |
+
|
| 99 |
+
if not query_soup:
|
| 100 |
+
raise HTTPException(status_code=400, detail="Please provide at least one tag or description.")
|
| 101 |
+
|
| 102 |
+
results = rec_engine.recommend_on_text(query_soup, k=request.k)
|
| 103 |
+
|
| 104 |
+
return {
|
| 105 |
+
"interpreted_query": query_soup,
|
| 106 |
+
"results": results
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
@app.post("/recommend/user", response_model=List[MovieResponse])
|
| 110 |
+
def recommend_for_user(request: UserHistoryRequest):
|
| 111 |
+
"""
|
| 112 |
+
Takes a list of movie titles the user likes, averages their vectors,
|
| 113 |
+
and finds similar movies.
|
| 114 |
+
"""
|
| 115 |
+
check_model()
|
| 116 |
+
results = rec_engine.recommend_for_user(request.liked_movies, k=request.k)
|
| 117 |
+
|
| 118 |
+
# Handle case where no movies were matched
|
| 119 |
+
if isinstance(results, str):
|
| 120 |
+
raise HTTPException(status_code=404, detail=results)
|
| 121 |
+
|
| 122 |
+
return results
|
| 123 |
+
|
| 124 |
+
@app.get("/recommend/movie/{title}", response_model=List[MovieResponse])
|
| 125 |
+
def recommend_similar_movie(title: str):
|
| 126 |
+
"""
|
| 127 |
+
Classic 'Users who liked X also liked Y' (Content-based version).
|
| 128 |
+
"""
|
| 129 |
+
check_model()
|
| 130 |
+
results = rec_engine.recommend_by_movie(title)
|
| 131 |
+
|
| 132 |
+
if isinstance(results, str):
|
| 133 |
+
raise HTTPException(status_code=404, detail=results)
|
| 134 |
+
|
| 135 |
+
return results
|
| 136 |
+
|
| 137 |
+
# --- Admin / Operations ---
|
| 138 |
+
|
| 139 |
+
def background_update_task():
|
| 140 |
+
"""
|
| 141 |
+
Runs the ingestion script and reloads the model in memory.
|
| 142 |
+
"""
|
| 143 |
+
print(" [BACKGROUND] Starting update process...")
|
| 144 |
+
# 1. Run the ingestion (Fetch from TMDB + Save to Disk)
|
| 145 |
+
run_weekly_update()
|
| 146 |
+
|
| 147 |
+
# 2. Reload the model in memory so the API sees the new movies immediately
|
| 148 |
+
print(" [BACKGROUND] Reloading model into RAM...")
|
| 149 |
+
rec_engine.load('models/')
|
| 150 |
+
print(" [BACKGROUND] Update complete. Model reloaded.")
|
| 151 |
+
|
| 152 |
+
@app.post("/admin/trigger-update")
|
| 153 |
+
def trigger_update(background_tasks: BackgroundTasks):
|
| 154 |
+
"""
|
| 155 |
+
Manually triggers the 'Weekly Update' logic.
|
| 156 |
+
Runs in the background so the API doesn't freeze.
|
| 157 |
+
"""
|
| 158 |
+
background_tasks.add_task(background_update_task)
|
| 159 |
+
return {"message": "Update process started in background. Check server logs for progress."}
|
| 160 |
+
|
| 161 |
+
# --- Entry Point ---
|
| 162 |
+
if __name__ == "__main__":
|
| 163 |
+
# Use this for debugging. In production, use the command line.
|
| 164 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
eda/.ipynb_checkpoints/Untitled-checkpoint.ipynb
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [],
|
| 3 |
+
"metadata": {},
|
| 4 |
+
"nbformat": 4,
|
| 5 |
+
"nbformat_minor": 5
|
| 6 |
+
}
|
eda/Untitled.ipynb
ADDED
|
@@ -0,0 +1,1251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"id": "faab86b6-6af3-4b86-8288-534228ce01ea",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [
|
| 9 |
+
{
|
| 10 |
+
"name": "stdout",
|
| 11 |
+
"output_type": "stream",
|
| 12 |
+
"text": [
|
| 13 |
+
"Collecting pandas\n",
|
| 14 |
+
" Using cached pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)\n",
|
| 15 |
+
"Requirement already satisfied: numpy in /home/jermaine/Desktop/CineMatch/venv/lib/python3.11/site-packages (2.3.3)\n",
|
| 16 |
+
"Collecting seaborn\n",
|
| 17 |
+
" Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)\n",
|
| 18 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/jermaine/Desktop/CineMatch/venv/lib/python3.11/site-packages (from pandas) (2.9.0.post0)\n",
|
| 19 |
+
"Collecting pytz>=2020.1 (from pandas)\n",
|
| 20 |
+
" Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n",
|
| 21 |
+
"Requirement already satisfied: tzdata>=2022.7 in /home/jermaine/Desktop/CineMatch/venv/lib/python3.11/site-packages (from pandas) (2025.2)\n",
|
| 22 |
+
"Collecting matplotlib!=3.6.1,>=3.4 (from seaborn)\n",
|
| 23 |
+
" Downloading matplotlib-3.10.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n",
|
| 24 |
+
"Collecting contourpy>=1.0.1 (from matplotlib!=3.6.1,>=3.4->seaborn)\n",
|
| 25 |
+
" Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n",
|
| 26 |
+
"Collecting cycler>=0.10 (from matplotlib!=3.6.1,>=3.4->seaborn)\n",
|
| 27 |
+
" Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
|
| 28 |
+
"Collecting fonttools>=4.22.0 (from matplotlib!=3.6.1,>=3.4->seaborn)\n",
|
| 29 |
+
" Downloading fonttools-4.61.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (113 kB)\n",
|
| 30 |
+
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m113.2/113.2 kB\u001b[0m \u001b[31m276.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
|
| 31 |
+
"\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib!=3.6.1,>=3.4->seaborn)\n",
|
| 32 |
+
" Downloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.3 kB)\n",
|
| 33 |
+
"Requirement already satisfied: packaging>=20.0 in /home/jermaine/Desktop/CineMatch/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (25.0)\n",
|
| 34 |
+
"Requirement already satisfied: pillow>=8 in /home/jermaine/Desktop/CineMatch/venv/lib/python3.11/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (11.3.0)\n",
|
| 35 |
+
"Collecting pyparsing>=3 (from matplotlib!=3.6.1,>=3.4->seaborn)\n",
|
| 36 |
+
" Downloading pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)\n",
|
| 37 |
+
"Requirement already satisfied: six>=1.5 in /home/jermaine/Desktop/CineMatch/venv/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n",
|
| 38 |
+
"Using cached pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.8 MB)\n",
|
| 39 |
+
"Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)\n",
|
| 40 |
+
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.9/294.9 kB\u001b[0m \u001b[31m409.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m1m745.6 kB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
|
| 41 |
+
"\u001b[?25hDownloading matplotlib-3.10.7-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.7 MB)\n",
|
| 42 |
+
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.7/8.7 MB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m[36m0:00:01\u001b[0m0m\n",
|
| 43 |
+
"\u001b[?25hUsing cached pytz-2025.2-py2.py3-none-any.whl (509 kB)\n",
|
| 44 |
+
"Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (355 kB)\n",
|
| 45 |
+
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m355.2/355.2 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\n",
|
| 46 |
+
"\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
|
| 47 |
+
"Downloading fonttools-4.61.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)\n",
|
| 48 |
+
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n",
|
| 49 |
+
"\u001b[?25hDownloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.4 MB)\n",
|
| 50 |
+
"\u001b[2K \u001b[38;2;114;156;31m���━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n",
|
| 51 |
+
"\u001b[?25hDownloading pyparsing-3.2.5-py3-none-any.whl (113 kB)\n",
|
| 52 |
+
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m113.9/113.9 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m\n",
|
| 53 |
+
"\u001b[?25hInstalling collected packages: pytz, pyparsing, kiwisolver, fonttools, cycler, contourpy, pandas, matplotlib, seaborn\n",
|
| 54 |
+
"Successfully installed contourpy-1.3.3 cycler-0.12.1 fonttools-4.61.0 kiwisolver-1.4.9 matplotlib-3.10.7 pandas-2.3.3 pyparsing-3.2.5 pytz-2025.2 seaborn-0.13.2\n",
|
| 55 |
+
"\n",
|
| 56 |
+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.3\u001b[0m\n",
|
| 57 |
+
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
| 58 |
+
]
|
| 59 |
+
}
|
| 60 |
+
],
|
| 61 |
+
"source": [
|
| 62 |
+
"!pip install pandas numpy seaborn"
|
| 63 |
+
]
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"cell_type": "code",
|
| 67 |
+
"execution_count": 7,
|
| 68 |
+
"id": "6c83aa85-43a2-4941-8ec4-510010ed72c1",
|
| 69 |
+
"metadata": {},
|
| 70 |
+
"outputs": [],
|
| 71 |
+
"source": [
|
| 72 |
+
"import pandas as pd\n",
|
| 73 |
+
"import numpy as np\n",
|
| 74 |
+
"import seaborn as sns\n",
|
| 75 |
+
"import warnings\n",
|
| 76 |
+
"warnings.filterwarnings(\"ignore\")"
|
| 77 |
+
]
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 8,
|
| 82 |
+
"id": "6c692a3d-e17e-458b-9cfd-9edc2b9c76e5",
|
| 83 |
+
"metadata": {},
|
| 84 |
+
"outputs": [],
|
| 85 |
+
"source": [
|
| 86 |
+
"df = pd.read_csv(\"~/Desktop/CineMatch/data/movies_metadata.csv\")"
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"cell_type": "code",
|
| 91 |
+
"execution_count": 9,
|
| 92 |
+
"id": "761e4049-76e8-4637-b34a-f922a8237836",
|
| 93 |
+
"metadata": {},
|
| 94 |
+
"outputs": [
|
| 95 |
+
{
|
| 96 |
+
"data": {
|
| 97 |
+
"text/html": [
|
| 98 |
+
"<div>\n",
|
| 99 |
+
"<style scoped>\n",
|
| 100 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 101 |
+
" vertical-align: middle;\n",
|
| 102 |
+
" }\n",
|
| 103 |
+
"\n",
|
| 104 |
+
" .dataframe tbody tr th {\n",
|
| 105 |
+
" vertical-align: top;\n",
|
| 106 |
+
" }\n",
|
| 107 |
+
"\n",
|
| 108 |
+
" .dataframe thead th {\n",
|
| 109 |
+
" text-align: right;\n",
|
| 110 |
+
" }\n",
|
| 111 |
+
"</style>\n",
|
| 112 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 113 |
+
" <thead>\n",
|
| 114 |
+
" <tr style=\"text-align: right;\">\n",
|
| 115 |
+
" <th></th>\n",
|
| 116 |
+
" <th>adult</th>\n",
|
| 117 |
+
" <th>belongs_to_collection</th>\n",
|
| 118 |
+
" <th>budget</th>\n",
|
| 119 |
+
" <th>genres</th>\n",
|
| 120 |
+
" <th>homepage</th>\n",
|
| 121 |
+
" <th>id</th>\n",
|
| 122 |
+
" <th>imdb_id</th>\n",
|
| 123 |
+
" <th>original_language</th>\n",
|
| 124 |
+
" <th>original_title</th>\n",
|
| 125 |
+
" <th>overview</th>\n",
|
| 126 |
+
" <th>...</th>\n",
|
| 127 |
+
" <th>release_date</th>\n",
|
| 128 |
+
" <th>revenue</th>\n",
|
| 129 |
+
" <th>runtime</th>\n",
|
| 130 |
+
" <th>spoken_languages</th>\n",
|
| 131 |
+
" <th>status</th>\n",
|
| 132 |
+
" <th>tagline</th>\n",
|
| 133 |
+
" <th>title</th>\n",
|
| 134 |
+
" <th>video</th>\n",
|
| 135 |
+
" <th>vote_average</th>\n",
|
| 136 |
+
" <th>vote_count</th>\n",
|
| 137 |
+
" </tr>\n",
|
| 138 |
+
" </thead>\n",
|
| 139 |
+
" <tbody>\n",
|
| 140 |
+
" <tr>\n",
|
| 141 |
+
" <th>0</th>\n",
|
| 142 |
+
" <td>False</td>\n",
|
| 143 |
+
" <td>{'id': 10194, 'name': 'Toy Story Collection', ...</td>\n",
|
| 144 |
+
" <td>30000000</td>\n",
|
| 145 |
+
" <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
|
| 146 |
+
" <td>http://toystory.disney.com/toy-story</td>\n",
|
| 147 |
+
" <td>862</td>\n",
|
| 148 |
+
" <td>tt0114709</td>\n",
|
| 149 |
+
" <td>en</td>\n",
|
| 150 |
+
" <td>Toy Story</td>\n",
|
| 151 |
+
" <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
|
| 152 |
+
" <td>...</td>\n",
|
| 153 |
+
" <td>1995-10-30</td>\n",
|
| 154 |
+
" <td>373554033.0</td>\n",
|
| 155 |
+
" <td>81.0</td>\n",
|
| 156 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 157 |
+
" <td>Released</td>\n",
|
| 158 |
+
" <td>NaN</td>\n",
|
| 159 |
+
" <td>Toy Story</td>\n",
|
| 160 |
+
" <td>False</td>\n",
|
| 161 |
+
" <td>7.7</td>\n",
|
| 162 |
+
" <td>5415.0</td>\n",
|
| 163 |
+
" </tr>\n",
|
| 164 |
+
" <tr>\n",
|
| 165 |
+
" <th>1</th>\n",
|
| 166 |
+
" <td>False</td>\n",
|
| 167 |
+
" <td>NaN</td>\n",
|
| 168 |
+
" <td>65000000</td>\n",
|
| 169 |
+
" <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
|
| 170 |
+
" <td>NaN</td>\n",
|
| 171 |
+
" <td>8844</td>\n",
|
| 172 |
+
" <td>tt0113497</td>\n",
|
| 173 |
+
" <td>en</td>\n",
|
| 174 |
+
" <td>Jumanji</td>\n",
|
| 175 |
+
" <td>When siblings Judy and Peter discover an encha...</td>\n",
|
| 176 |
+
" <td>...</td>\n",
|
| 177 |
+
" <td>1995-12-15</td>\n",
|
| 178 |
+
" <td>262797249.0</td>\n",
|
| 179 |
+
" <td>104.0</td>\n",
|
| 180 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}, {'iso...</td>\n",
|
| 181 |
+
" <td>Released</td>\n",
|
| 182 |
+
" <td>Roll the dice and unleash the excitement!</td>\n",
|
| 183 |
+
" <td>Jumanji</td>\n",
|
| 184 |
+
" <td>False</td>\n",
|
| 185 |
+
" <td>6.9</td>\n",
|
| 186 |
+
" <td>2413.0</td>\n",
|
| 187 |
+
" </tr>\n",
|
| 188 |
+
" <tr>\n",
|
| 189 |
+
" <th>2</th>\n",
|
| 190 |
+
" <td>False</td>\n",
|
| 191 |
+
" <td>{'id': 119050, 'name': 'Grumpy Old Men Collect...</td>\n",
|
| 192 |
+
" <td>0</td>\n",
|
| 193 |
+
" <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
|
| 194 |
+
" <td>NaN</td>\n",
|
| 195 |
+
" <td>15602</td>\n",
|
| 196 |
+
" <td>tt0113228</td>\n",
|
| 197 |
+
" <td>en</td>\n",
|
| 198 |
+
" <td>Grumpier Old Men</td>\n",
|
| 199 |
+
" <td>A family wedding reignites the ancient feud be...</td>\n",
|
| 200 |
+
" <td>...</td>\n",
|
| 201 |
+
" <td>1995-12-22</td>\n",
|
| 202 |
+
" <td>0.0</td>\n",
|
| 203 |
+
" <td>101.0</td>\n",
|
| 204 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 205 |
+
" <td>Released</td>\n",
|
| 206 |
+
" <td>Still Yelling. Still Fighting. Still Ready for...</td>\n",
|
| 207 |
+
" <td>Grumpier Old Men</td>\n",
|
| 208 |
+
" <td>False</td>\n",
|
| 209 |
+
" <td>6.5</td>\n",
|
| 210 |
+
" <td>92.0</td>\n",
|
| 211 |
+
" </tr>\n",
|
| 212 |
+
" <tr>\n",
|
| 213 |
+
" <th>3</th>\n",
|
| 214 |
+
" <td>False</td>\n",
|
| 215 |
+
" <td>NaN</td>\n",
|
| 216 |
+
" <td>16000000</td>\n",
|
| 217 |
+
" <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
|
| 218 |
+
" <td>NaN</td>\n",
|
| 219 |
+
" <td>31357</td>\n",
|
| 220 |
+
" <td>tt0114885</td>\n",
|
| 221 |
+
" <td>en</td>\n",
|
| 222 |
+
" <td>Waiting to Exhale</td>\n",
|
| 223 |
+
" <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
|
| 224 |
+
" <td>...</td>\n",
|
| 225 |
+
" <td>1995-12-22</td>\n",
|
| 226 |
+
" <td>81452156.0</td>\n",
|
| 227 |
+
" <td>127.0</td>\n",
|
| 228 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 229 |
+
" <td>Released</td>\n",
|
| 230 |
+
" <td>Friends are the people who let you be yourself...</td>\n",
|
| 231 |
+
" <td>Waiting to Exhale</td>\n",
|
| 232 |
+
" <td>False</td>\n",
|
| 233 |
+
" <td>6.1</td>\n",
|
| 234 |
+
" <td>34.0</td>\n",
|
| 235 |
+
" </tr>\n",
|
| 236 |
+
" <tr>\n",
|
| 237 |
+
" <th>4</th>\n",
|
| 238 |
+
" <td>False</td>\n",
|
| 239 |
+
" <td>{'id': 96871, 'name': 'Father of the Bride Col...</td>\n",
|
| 240 |
+
" <td>0</td>\n",
|
| 241 |
+
" <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
|
| 242 |
+
" <td>NaN</td>\n",
|
| 243 |
+
" <td>11862</td>\n",
|
| 244 |
+
" <td>tt0113041</td>\n",
|
| 245 |
+
" <td>en</td>\n",
|
| 246 |
+
" <td>Father of the Bride Part II</td>\n",
|
| 247 |
+
" <td>Just when George Banks has recovered from his ...</td>\n",
|
| 248 |
+
" <td>...</td>\n",
|
| 249 |
+
" <td>1995-02-10</td>\n",
|
| 250 |
+
" <td>76578911.0</td>\n",
|
| 251 |
+
" <td>106.0</td>\n",
|
| 252 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 253 |
+
" <td>Released</td>\n",
|
| 254 |
+
" <td>Just When His World Is Back To Normal... He's ...</td>\n",
|
| 255 |
+
" <td>Father of the Bride Part II</td>\n",
|
| 256 |
+
" <td>False</td>\n",
|
| 257 |
+
" <td>5.7</td>\n",
|
| 258 |
+
" <td>173.0</td>\n",
|
| 259 |
+
" </tr>\n",
|
| 260 |
+
" <tr>\n",
|
| 261 |
+
" <th>...</th>\n",
|
| 262 |
+
" <td>...</td>\n",
|
| 263 |
+
" <td>...</td>\n",
|
| 264 |
+
" <td>...</td>\n",
|
| 265 |
+
" <td>...</td>\n",
|
| 266 |
+
" <td>...</td>\n",
|
| 267 |
+
" <td>...</td>\n",
|
| 268 |
+
" <td>...</td>\n",
|
| 269 |
+
" <td>...</td>\n",
|
| 270 |
+
" <td>...</td>\n",
|
| 271 |
+
" <td>...</td>\n",
|
| 272 |
+
" <td>...</td>\n",
|
| 273 |
+
" <td>...</td>\n",
|
| 274 |
+
" <td>...</td>\n",
|
| 275 |
+
" <td>...</td>\n",
|
| 276 |
+
" <td>...</td>\n",
|
| 277 |
+
" <td>...</td>\n",
|
| 278 |
+
" <td>...</td>\n",
|
| 279 |
+
" <td>...</td>\n",
|
| 280 |
+
" <td>...</td>\n",
|
| 281 |
+
" <td>...</td>\n",
|
| 282 |
+
" <td>...</td>\n",
|
| 283 |
+
" </tr>\n",
|
| 284 |
+
" <tr>\n",
|
| 285 |
+
" <th>45461</th>\n",
|
| 286 |
+
" <td>False</td>\n",
|
| 287 |
+
" <td>NaN</td>\n",
|
| 288 |
+
" <td>0</td>\n",
|
| 289 |
+
" <td>[{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n...</td>\n",
|
| 290 |
+
" <td>http://www.imdb.com/title/tt6209470/</td>\n",
|
| 291 |
+
" <td>439050</td>\n",
|
| 292 |
+
" <td>tt6209470</td>\n",
|
| 293 |
+
" <td>fa</td>\n",
|
| 294 |
+
" <td>رگ خواب</td>\n",
|
| 295 |
+
" <td>Rising and falling between a man and woman.</td>\n",
|
| 296 |
+
" <td>...</td>\n",
|
| 297 |
+
" <td>NaN</td>\n",
|
| 298 |
+
" <td>0.0</td>\n",
|
| 299 |
+
" <td>90.0</td>\n",
|
| 300 |
+
" <td>[{'iso_639_1': 'fa', 'name': 'فارسی'}]</td>\n",
|
| 301 |
+
" <td>Released</td>\n",
|
| 302 |
+
" <td>Rising and falling between a man and woman</td>\n",
|
| 303 |
+
" <td>Subdue</td>\n",
|
| 304 |
+
" <td>False</td>\n",
|
| 305 |
+
" <td>4.0</td>\n",
|
| 306 |
+
" <td>1.0</td>\n",
|
| 307 |
+
" </tr>\n",
|
| 308 |
+
" <tr>\n",
|
| 309 |
+
" <th>45462</th>\n",
|
| 310 |
+
" <td>False</td>\n",
|
| 311 |
+
" <td>NaN</td>\n",
|
| 312 |
+
" <td>0</td>\n",
|
| 313 |
+
" <td>[{'id': 18, 'name': 'Drama'}]</td>\n",
|
| 314 |
+
" <td>NaN</td>\n",
|
| 315 |
+
" <td>111109</td>\n",
|
| 316 |
+
" <td>tt2028550</td>\n",
|
| 317 |
+
" <td>tl</td>\n",
|
| 318 |
+
" <td>Siglo ng Pagluluwal</td>\n",
|
| 319 |
+
" <td>An artist struggles to finish his work while a...</td>\n",
|
| 320 |
+
" <td>...</td>\n",
|
| 321 |
+
" <td>2011-11-17</td>\n",
|
| 322 |
+
" <td>0.0</td>\n",
|
| 323 |
+
" <td>360.0</td>\n",
|
| 324 |
+
" <td>[{'iso_639_1': 'tl', 'name': ''}]</td>\n",
|
| 325 |
+
" <td>Released</td>\n",
|
| 326 |
+
" <td>NaN</td>\n",
|
| 327 |
+
" <td>Century of Birthing</td>\n",
|
| 328 |
+
" <td>False</td>\n",
|
| 329 |
+
" <td>9.0</td>\n",
|
| 330 |
+
" <td>3.0</td>\n",
|
| 331 |
+
" </tr>\n",
|
| 332 |
+
" <tr>\n",
|
| 333 |
+
" <th>45463</th>\n",
|
| 334 |
+
" <td>False</td>\n",
|
| 335 |
+
" <td>NaN</td>\n",
|
| 336 |
+
" <td>0</td>\n",
|
| 337 |
+
" <td>[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...</td>\n",
|
| 338 |
+
" <td>NaN</td>\n",
|
| 339 |
+
" <td>67758</td>\n",
|
| 340 |
+
" <td>tt0303758</td>\n",
|
| 341 |
+
" <td>en</td>\n",
|
| 342 |
+
" <td>Betrayal</td>\n",
|
| 343 |
+
" <td>When one of her hits goes wrong, a professiona...</td>\n",
|
| 344 |
+
" <td>...</td>\n",
|
| 345 |
+
" <td>2003-08-01</td>\n",
|
| 346 |
+
" <td>0.0</td>\n",
|
| 347 |
+
" <td>90.0</td>\n",
|
| 348 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 349 |
+
" <td>Released</td>\n",
|
| 350 |
+
" <td>A deadly game of wits.</td>\n",
|
| 351 |
+
" <td>Betrayal</td>\n",
|
| 352 |
+
" <td>False</td>\n",
|
| 353 |
+
" <td>3.8</td>\n",
|
| 354 |
+
" <td>6.0</td>\n",
|
| 355 |
+
" </tr>\n",
|
| 356 |
+
" <tr>\n",
|
| 357 |
+
" <th>45464</th>\n",
|
| 358 |
+
" <td>False</td>\n",
|
| 359 |
+
" <td>NaN</td>\n",
|
| 360 |
+
" <td>0</td>\n",
|
| 361 |
+
" <td>[]</td>\n",
|
| 362 |
+
" <td>NaN</td>\n",
|
| 363 |
+
" <td>227506</td>\n",
|
| 364 |
+
" <td>tt0008536</td>\n",
|
| 365 |
+
" <td>en</td>\n",
|
| 366 |
+
" <td>Satana likuyushchiy</td>\n",
|
| 367 |
+
" <td>In a small town live two brothers, one a minis...</td>\n",
|
| 368 |
+
" <td>...</td>\n",
|
| 369 |
+
" <td>1917-10-21</td>\n",
|
| 370 |
+
" <td>0.0</td>\n",
|
| 371 |
+
" <td>87.0</td>\n",
|
| 372 |
+
" <td>[]</td>\n",
|
| 373 |
+
" <td>Released</td>\n",
|
| 374 |
+
" <td>NaN</td>\n",
|
| 375 |
+
" <td>Satan Triumphant</td>\n",
|
| 376 |
+
" <td>False</td>\n",
|
| 377 |
+
" <td>0.0</td>\n",
|
| 378 |
+
" <td>0.0</td>\n",
|
| 379 |
+
" </tr>\n",
|
| 380 |
+
" <tr>\n",
|
| 381 |
+
" <th>45465</th>\n",
|
| 382 |
+
" <td>False</td>\n",
|
| 383 |
+
" <td>NaN</td>\n",
|
| 384 |
+
" <td>0</td>\n",
|
| 385 |
+
" <td>[]</td>\n",
|
| 386 |
+
" <td>NaN</td>\n",
|
| 387 |
+
" <td>461257</td>\n",
|
| 388 |
+
" <td>tt6980792</td>\n",
|
| 389 |
+
" <td>en</td>\n",
|
| 390 |
+
" <td>Queerama</td>\n",
|
| 391 |
+
" <td>50 years after decriminalisation of homosexual...</td>\n",
|
| 392 |
+
" <td>...</td>\n",
|
| 393 |
+
" <td>2017-06-09</td>\n",
|
| 394 |
+
" <td>0.0</td>\n",
|
| 395 |
+
" <td>75.0</td>\n",
|
| 396 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 397 |
+
" <td>Released</td>\n",
|
| 398 |
+
" <td>NaN</td>\n",
|
| 399 |
+
" <td>Queerama</td>\n",
|
| 400 |
+
" <td>False</td>\n",
|
| 401 |
+
" <td>0.0</td>\n",
|
| 402 |
+
" <td>0.0</td>\n",
|
| 403 |
+
" </tr>\n",
|
| 404 |
+
" </tbody>\n",
|
| 405 |
+
"</table>\n",
|
| 406 |
+
"<p>45466 rows × 24 columns</p>\n",
|
| 407 |
+
"</div>"
|
| 408 |
+
],
|
| 409 |
+
"text/plain": [
|
| 410 |
+
" adult belongs_to_collection budget \\\n",
|
| 411 |
+
"0 False {'id': 10194, 'name': 'Toy Story Collection', ... 30000000 \n",
|
| 412 |
+
"1 False NaN 65000000 \n",
|
| 413 |
+
"2 False {'id': 119050, 'name': 'Grumpy Old Men Collect... 0 \n",
|
| 414 |
+
"3 False NaN 16000000 \n",
|
| 415 |
+
"4 False {'id': 96871, 'name': 'Father of the Bride Col... 0 \n",
|
| 416 |
+
"... ... ... ... \n",
|
| 417 |
+
"45461 False NaN 0 \n",
|
| 418 |
+
"45462 False NaN 0 \n",
|
| 419 |
+
"45463 False NaN 0 \n",
|
| 420 |
+
"45464 False NaN 0 \n",
|
| 421 |
+
"45465 False NaN 0 \n",
|
| 422 |
+
"\n",
|
| 423 |
+
" genres \\\n",
|
| 424 |
+
"0 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
|
| 425 |
+
"1 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
|
| 426 |
+
"2 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
|
| 427 |
+
"3 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
|
| 428 |
+
"4 [{'id': 35, 'name': 'Comedy'}] \n",
|
| 429 |
+
"... ... \n",
|
| 430 |
+
"45461 [{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n... \n",
|
| 431 |
+
"45462 [{'id': 18, 'name': 'Drama'}] \n",
|
| 432 |
+
"45463 [{'id': 28, 'name': 'Action'}, {'id': 18, 'nam... \n",
|
| 433 |
+
"45464 [] \n",
|
| 434 |
+
"45465 [] \n",
|
| 435 |
+
"\n",
|
| 436 |
+
" homepage id imdb_id \\\n",
|
| 437 |
+
"0 http://toystory.disney.com/toy-story 862 tt0114709 \n",
|
| 438 |
+
"1 NaN 8844 tt0113497 \n",
|
| 439 |
+
"2 NaN 15602 tt0113228 \n",
|
| 440 |
+
"3 NaN 31357 tt0114885 \n",
|
| 441 |
+
"4 NaN 11862 tt0113041 \n",
|
| 442 |
+
"... ... ... ... \n",
|
| 443 |
+
"45461 http://www.imdb.com/title/tt6209470/ 439050 tt6209470 \n",
|
| 444 |
+
"45462 NaN 111109 tt2028550 \n",
|
| 445 |
+
"45463 NaN 67758 tt0303758 \n",
|
| 446 |
+
"45464 NaN 227506 tt0008536 \n",
|
| 447 |
+
"45465 NaN 461257 tt6980792 \n",
|
| 448 |
+
"\n",
|
| 449 |
+
" original_language original_title \\\n",
|
| 450 |
+
"0 en Toy Story \n",
|
| 451 |
+
"1 en Jumanji \n",
|
| 452 |
+
"2 en Grumpier Old Men \n",
|
| 453 |
+
"3 en Waiting to Exhale \n",
|
| 454 |
+
"4 en Father of the Bride Part II \n",
|
| 455 |
+
"... ... ... \n",
|
| 456 |
+
"45461 fa رگ خواب \n",
|
| 457 |
+
"45462 tl Siglo ng Pagluluwal \n",
|
| 458 |
+
"45463 en Betrayal \n",
|
| 459 |
+
"45464 en Satana likuyushchiy \n",
|
| 460 |
+
"45465 en Queerama \n",
|
| 461 |
+
"\n",
|
| 462 |
+
" overview ... release_date \\\n",
|
| 463 |
+
"0 Led by Woody, Andy's toys live happily in his ... ... 1995-10-30 \n",
|
| 464 |
+
"1 When siblings Judy and Peter discover an encha... ... 1995-12-15 \n",
|
| 465 |
+
"2 A family wedding reignites the ancient feud be... ... 1995-12-22 \n",
|
| 466 |
+
"3 Cheated on, mistreated and stepped on, the wom... ... 1995-12-22 \n",
|
| 467 |
+
"4 Just when George Banks has recovered from his ... ... 1995-02-10 \n",
|
| 468 |
+
"... ... ... ... \n",
|
| 469 |
+
"45461 Rising and falling between a man and woman. ... NaN \n",
|
| 470 |
+
"45462 An artist struggles to finish his work while a... ... 2011-11-17 \n",
|
| 471 |
+
"45463 When one of her hits goes wrong, a professiona... ... 2003-08-01 \n",
|
| 472 |
+
"45464 In a small town live two brothers, one a minis... ... 1917-10-21 \n",
|
| 473 |
+
"45465 50 years after decriminalisation of homosexual... ... 2017-06-09 \n",
|
| 474 |
+
"\n",
|
| 475 |
+
" revenue runtime spoken_languages \\\n",
|
| 476 |
+
"0 373554033.0 81.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 477 |
+
"1 262797249.0 104.0 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n",
|
| 478 |
+
"2 0.0 101.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 479 |
+
"3 81452156.0 127.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 480 |
+
"4 76578911.0 106.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 481 |
+
"... ... ... ... \n",
|
| 482 |
+
"45461 0.0 90.0 [{'iso_639_1': 'fa', 'name': 'فارسی'}] \n",
|
| 483 |
+
"45462 0.0 360.0 [{'iso_639_1': 'tl', 'name': ''}] \n",
|
| 484 |
+
"45463 0.0 90.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 485 |
+
"45464 0.0 87.0 [] \n",
|
| 486 |
+
"45465 0.0 75.0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 487 |
+
"\n",
|
| 488 |
+
" status tagline \\\n",
|
| 489 |
+
"0 Released NaN \n",
|
| 490 |
+
"1 Released Roll the dice and unleash the excitement! \n",
|
| 491 |
+
"2 Released Still Yelling. Still Fighting. Still Ready for... \n",
|
| 492 |
+
"3 Released Friends are the people who let you be yourself... \n",
|
| 493 |
+
"4 Released Just When His World Is Back To Normal... He's ... \n",
|
| 494 |
+
"... ... ... \n",
|
| 495 |
+
"45461 Released Rising and falling between a man and woman \n",
|
| 496 |
+
"45462 Released NaN \n",
|
| 497 |
+
"45463 Released A deadly game of wits. \n",
|
| 498 |
+
"45464 Released NaN \n",
|
| 499 |
+
"45465 Released NaN \n",
|
| 500 |
+
"\n",
|
| 501 |
+
" title video vote_average vote_count \n",
|
| 502 |
+
"0 Toy Story False 7.7 5415.0 \n",
|
| 503 |
+
"1 Jumanji False 6.9 2413.0 \n",
|
| 504 |
+
"2 Grumpier Old Men False 6.5 92.0 \n",
|
| 505 |
+
"3 Waiting to Exhale False 6.1 34.0 \n",
|
| 506 |
+
"4 Father of the Bride Part II False 5.7 173.0 \n",
|
| 507 |
+
"... ... ... ... ... \n",
|
| 508 |
+
"45461 Subdue False 4.0 1.0 \n",
|
| 509 |
+
"45462 Century of Birthing False 9.0 3.0 \n",
|
| 510 |
+
"45463 Betrayal False 3.8 6.0 \n",
|
| 511 |
+
"45464 Satan Triumphant False 0.0 0.0 \n",
|
| 512 |
+
"45465 Queerama False 0.0 0.0 \n",
|
| 513 |
+
"\n",
|
| 514 |
+
"[45466 rows x 24 columns]"
|
| 515 |
+
]
|
| 516 |
+
},
|
| 517 |
+
"execution_count": 9,
|
| 518 |
+
"metadata": {},
|
| 519 |
+
"output_type": "execute_result"
|
| 520 |
+
}
|
| 521 |
+
],
|
| 522 |
+
"source": [
|
| 523 |
+
"df"
|
| 524 |
+
]
|
| 525 |
+
},
|
| 526 |
+
{
|
| 527 |
+
"cell_type": "code",
|
| 528 |
+
"execution_count": 10,
|
| 529 |
+
"id": "010b9a7e-2075-4119-9b3e-6a79970245c0",
|
| 530 |
+
"metadata": {},
|
| 531 |
+
"outputs": [
|
| 532 |
+
{
|
| 533 |
+
"name": "stdout",
|
| 534 |
+
"output_type": "stream",
|
| 535 |
+
"text": [
|
| 536 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
| 537 |
+
"RangeIndex: 45466 entries, 0 to 45465\n",
|
| 538 |
+
"Data columns (total 24 columns):\n",
|
| 539 |
+
" # Column Non-Null Count Dtype \n",
|
| 540 |
+
"--- ------ -------------- ----- \n",
|
| 541 |
+
" 0 adult 45466 non-null object \n",
|
| 542 |
+
" 1 belongs_to_collection 4494 non-null object \n",
|
| 543 |
+
" 2 budget 45466 non-null object \n",
|
| 544 |
+
" 3 genres 45466 non-null object \n",
|
| 545 |
+
" 4 homepage 7782 non-null object \n",
|
| 546 |
+
" 5 id 45466 non-null object \n",
|
| 547 |
+
" 6 imdb_id 45449 non-null object \n",
|
| 548 |
+
" 7 original_language 45455 non-null object \n",
|
| 549 |
+
" 8 original_title 45466 non-null object \n",
|
| 550 |
+
" 9 overview 44512 non-null object \n",
|
| 551 |
+
" 10 popularity 45461 non-null object \n",
|
| 552 |
+
" 11 poster_path 45080 non-null object \n",
|
| 553 |
+
" 12 production_companies 45463 non-null object \n",
|
| 554 |
+
" 13 production_countries 45463 non-null object \n",
|
| 555 |
+
" 14 release_date 45379 non-null object \n",
|
| 556 |
+
" 15 revenue 45460 non-null float64\n",
|
| 557 |
+
" 16 runtime 45203 non-null float64\n",
|
| 558 |
+
" 17 spoken_languages 45460 non-null object \n",
|
| 559 |
+
" 18 status 45379 non-null object \n",
|
| 560 |
+
" 19 tagline 20412 non-null object \n",
|
| 561 |
+
" 20 title 45460 non-null object \n",
|
| 562 |
+
" 21 video 45460 non-null object \n",
|
| 563 |
+
" 22 vote_average 45460 non-null float64\n",
|
| 564 |
+
" 23 vote_count 45460 non-null float64\n",
|
| 565 |
+
"dtypes: float64(4), object(20)\n",
|
| 566 |
+
"memory usage: 8.3+ MB\n"
|
| 567 |
+
]
|
| 568 |
+
}
|
| 569 |
+
],
|
| 570 |
+
"source": [
|
| 571 |
+
"df.info()"
|
| 572 |
+
]
|
| 573 |
+
},
|
| 574 |
+
{
|
| 575 |
+
"cell_type": "code",
|
| 576 |
+
"execution_count": 11,
|
| 577 |
+
"id": "e17d1cb7-6446-40c7-acf8-05934b5b66c0",
|
| 578 |
+
"metadata": {},
|
| 579 |
+
"outputs": [
|
| 580 |
+
{
|
| 581 |
+
"data": {
|
| 582 |
+
"text/plain": [
|
| 583 |
+
"adult False\n",
|
| 584 |
+
"belongs_to_collection {'id': 10194, 'name': 'Toy Story Collection', ...\n",
|
| 585 |
+
"budget 30000000\n",
|
| 586 |
+
"genres [{'id': 16, 'name': 'Animation'}, {'id': 35, '...\n",
|
| 587 |
+
"homepage http://toystory.disney.com/toy-story\n",
|
| 588 |
+
"id 862\n",
|
| 589 |
+
"imdb_id tt0114709\n",
|
| 590 |
+
"original_language en\n",
|
| 591 |
+
"original_title Toy Story\n",
|
| 592 |
+
"overview Led by Woody, Andy's toys live happily in his ...\n",
|
| 593 |
+
"popularity 21.946943\n",
|
| 594 |
+
"poster_path /rhIRbceoE9lR4veEXuwCC2wARtG.jpg\n",
|
| 595 |
+
"production_companies [{'name': 'Pixar Animation Studios', 'id': 3}]\n",
|
| 596 |
+
"production_countries [{'iso_3166_1': 'US', 'name': 'United States o...\n",
|
| 597 |
+
"release_date 1995-10-30\n",
|
| 598 |
+
"revenue 373554033.0\n",
|
| 599 |
+
"runtime 81.0\n",
|
| 600 |
+
"spoken_languages [{'iso_639_1': 'en', 'name': 'English'}]\n",
|
| 601 |
+
"status Released\n",
|
| 602 |
+
"tagline NaN\n",
|
| 603 |
+
"title Toy Story\n",
|
| 604 |
+
"video False\n",
|
| 605 |
+
"vote_average 7.7\n",
|
| 606 |
+
"vote_count 5415.0\n",
|
| 607 |
+
"Name: 0, dtype: object"
|
| 608 |
+
]
|
| 609 |
+
},
|
| 610 |
+
"execution_count": 11,
|
| 611 |
+
"metadata": {},
|
| 612 |
+
"output_type": "execute_result"
|
| 613 |
+
}
|
| 614 |
+
],
|
| 615 |
+
"source": [
|
| 616 |
+
"df.iloc[0]"
|
| 617 |
+
]
|
| 618 |
+
},
|
| 619 |
+
{
|
| 620 |
+
"cell_type": "code",
|
| 621 |
+
"execution_count": 14,
|
| 622 |
+
"id": "0dc44094-31b5-4870-81db-16d3bc96e59e",
|
| 623 |
+
"metadata": {},
|
| 624 |
+
"outputs": [],
|
| 625 |
+
"source": [
|
| 626 |
+
"df.drop(['vote_count','vote_average','video'], axis=1,inplace=True)\n"
|
| 627 |
+
]
|
| 628 |
+
},
|
| 629 |
+
{
|
| 630 |
+
"cell_type": "code",
|
| 631 |
+
"execution_count": 18,
|
| 632 |
+
"id": "12d1ffb9-5cf2-4f7d-8a92-85c8471233bd",
|
| 633 |
+
"metadata": {},
|
| 634 |
+
"outputs": [
|
| 635 |
+
{
|
| 636 |
+
"data": {
|
| 637 |
+
"text/plain": [
|
| 638 |
+
"\"{'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}\""
|
| 639 |
+
]
|
| 640 |
+
},
|
| 641 |
+
"execution_count": 18,
|
| 642 |
+
"metadata": {},
|
| 643 |
+
"output_type": "execute_result"
|
| 644 |
+
}
|
| 645 |
+
],
|
| 646 |
+
"source": [
|
| 647 |
+
"df['belongs_to_collection'][2]"
|
| 648 |
+
]
|
| 649 |
+
},
|
| 650 |
+
{
|
| 651 |
+
"cell_type": "code",
|
| 652 |
+
"execution_count": 20,
|
| 653 |
+
"id": "33cce957-eed8-429a-b9f7-1ce83e4ec49c",
|
| 654 |
+
"metadata": {},
|
| 655 |
+
"outputs": [],
|
| 656 |
+
"source": [
|
| 657 |
+
"df.drop('belongs_to_collection',axis=1,inplace=True)"
|
| 658 |
+
]
|
| 659 |
+
},
|
| 660 |
+
{
|
| 661 |
+
"cell_type": "code",
|
| 662 |
+
"execution_count": 21,
|
| 663 |
+
"id": "ced88473-c105-4e0a-808b-f9903c8b8643",
|
| 664 |
+
"metadata": {},
|
| 665 |
+
"outputs": [],
|
| 666 |
+
"source": [
|
| 667 |
+
"df.drop(['homepage','status',],axis=1,inplace=True)"
|
| 668 |
+
]
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"cell_type": "code",
|
| 672 |
+
"execution_count": 22,
|
| 673 |
+
"id": "519facbd-91c9-4784-aa32-44a0d9edb24f",
|
| 674 |
+
"metadata": {},
|
| 675 |
+
"outputs": [
|
| 676 |
+
{
|
| 677 |
+
"data": {
|
| 678 |
+
"text/html": [
|
| 679 |
+
"<div>\n",
|
| 680 |
+
"<style scoped>\n",
|
| 681 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 682 |
+
" vertical-align: middle;\n",
|
| 683 |
+
" }\n",
|
| 684 |
+
"\n",
|
| 685 |
+
" .dataframe tbody tr th {\n",
|
| 686 |
+
" vertical-align: top;\n",
|
| 687 |
+
" }\n",
|
| 688 |
+
"\n",
|
| 689 |
+
" .dataframe thead th {\n",
|
| 690 |
+
" text-align: right;\n",
|
| 691 |
+
" }\n",
|
| 692 |
+
"</style>\n",
|
| 693 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 694 |
+
" <thead>\n",
|
| 695 |
+
" <tr style=\"text-align: right;\">\n",
|
| 696 |
+
" <th></th>\n",
|
| 697 |
+
" <th>adult</th>\n",
|
| 698 |
+
" <th>budget</th>\n",
|
| 699 |
+
" <th>genres</th>\n",
|
| 700 |
+
" <th>id</th>\n",
|
| 701 |
+
" <th>imdb_id</th>\n",
|
| 702 |
+
" <th>original_language</th>\n",
|
| 703 |
+
" <th>original_title</th>\n",
|
| 704 |
+
" <th>overview</th>\n",
|
| 705 |
+
" <th>popularity</th>\n",
|
| 706 |
+
" <th>poster_path</th>\n",
|
| 707 |
+
" <th>production_companies</th>\n",
|
| 708 |
+
" <th>production_countries</th>\n",
|
| 709 |
+
" <th>release_date</th>\n",
|
| 710 |
+
" <th>revenue</th>\n",
|
| 711 |
+
" <th>runtime</th>\n",
|
| 712 |
+
" <th>spoken_languages</th>\n",
|
| 713 |
+
" <th>tagline</th>\n",
|
| 714 |
+
" <th>title</th>\n",
|
| 715 |
+
" </tr>\n",
|
| 716 |
+
" </thead>\n",
|
| 717 |
+
" <tbody>\n",
|
| 718 |
+
" <tr>\n",
|
| 719 |
+
" <th>0</th>\n",
|
| 720 |
+
" <td>False</td>\n",
|
| 721 |
+
" <td>30000000</td>\n",
|
| 722 |
+
" <td>[{'id': 16, 'name': 'Animation'}, {'id': 35, '...</td>\n",
|
| 723 |
+
" <td>862</td>\n",
|
| 724 |
+
" <td>tt0114709</td>\n",
|
| 725 |
+
" <td>en</td>\n",
|
| 726 |
+
" <td>Toy Story</td>\n",
|
| 727 |
+
" <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
|
| 728 |
+
" <td>21.946943</td>\n",
|
| 729 |
+
" <td>/rhIRbceoE9lR4veEXuwCC2wARtG.jpg</td>\n",
|
| 730 |
+
" <td>[{'name': 'Pixar Animation Studios', 'id': 3}]</td>\n",
|
| 731 |
+
" <td>[{'iso_3166_1': 'US', 'name': 'United States o...</td>\n",
|
| 732 |
+
" <td>1995-10-30</td>\n",
|
| 733 |
+
" <td>373554033.0</td>\n",
|
| 734 |
+
" <td>81.0</td>\n",
|
| 735 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 736 |
+
" <td>NaN</td>\n",
|
| 737 |
+
" <td>Toy Story</td>\n",
|
| 738 |
+
" </tr>\n",
|
| 739 |
+
" <tr>\n",
|
| 740 |
+
" <th>1</th>\n",
|
| 741 |
+
" <td>False</td>\n",
|
| 742 |
+
" <td>65000000</td>\n",
|
| 743 |
+
" <td>[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...</td>\n",
|
| 744 |
+
" <td>8844</td>\n",
|
| 745 |
+
" <td>tt0113497</td>\n",
|
| 746 |
+
" <td>en</td>\n",
|
| 747 |
+
" <td>Jumanji</td>\n",
|
| 748 |
+
" <td>When siblings Judy and Peter discover an encha...</td>\n",
|
| 749 |
+
" <td>17.015539</td>\n",
|
| 750 |
+
" <td>/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg</td>\n",
|
| 751 |
+
" <td>[{'name': 'TriStar Pictures', 'id': 559}, {'na...</td>\n",
|
| 752 |
+
" <td>[{'iso_3166_1': 'US', 'name': 'United States o...</td>\n",
|
| 753 |
+
" <td>1995-12-15</td>\n",
|
| 754 |
+
" <td>262797249.0</td>\n",
|
| 755 |
+
" <td>104.0</td>\n",
|
| 756 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}, {'iso...</td>\n",
|
| 757 |
+
" <td>Roll the dice and unleash the excitement!</td>\n",
|
| 758 |
+
" <td>Jumanji</td>\n",
|
| 759 |
+
" </tr>\n",
|
| 760 |
+
" <tr>\n",
|
| 761 |
+
" <th>2</th>\n",
|
| 762 |
+
" <td>False</td>\n",
|
| 763 |
+
" <td>0</td>\n",
|
| 764 |
+
" <td>[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...</td>\n",
|
| 765 |
+
" <td>15602</td>\n",
|
| 766 |
+
" <td>tt0113228</td>\n",
|
| 767 |
+
" <td>en</td>\n",
|
| 768 |
+
" <td>Grumpier Old Men</td>\n",
|
| 769 |
+
" <td>A family wedding reignites the ancient feud be...</td>\n",
|
| 770 |
+
" <td>11.7129</td>\n",
|
| 771 |
+
" <td>/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg</td>\n",
|
| 772 |
+
" <td>[{'name': 'Warner Bros.', 'id': 6194}, {'name'...</td>\n",
|
| 773 |
+
" <td>[{'iso_3166_1': 'US', 'name': 'United States o...</td>\n",
|
| 774 |
+
" <td>1995-12-22</td>\n",
|
| 775 |
+
" <td>0.0</td>\n",
|
| 776 |
+
" <td>101.0</td>\n",
|
| 777 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 778 |
+
" <td>Still Yelling. Still Fighting. Still Ready for...</td>\n",
|
| 779 |
+
" <td>Grumpier Old Men</td>\n",
|
| 780 |
+
" </tr>\n",
|
| 781 |
+
" <tr>\n",
|
| 782 |
+
" <th>3</th>\n",
|
| 783 |
+
" <td>False</td>\n",
|
| 784 |
+
" <td>16000000</td>\n",
|
| 785 |
+
" <td>[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...</td>\n",
|
| 786 |
+
" <td>31357</td>\n",
|
| 787 |
+
" <td>tt0114885</td>\n",
|
| 788 |
+
" <td>en</td>\n",
|
| 789 |
+
" <td>Waiting to Exhale</td>\n",
|
| 790 |
+
" <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
|
| 791 |
+
" <td>3.859495</td>\n",
|
| 792 |
+
" <td>/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg</td>\n",
|
| 793 |
+
" <td>[{'name': 'Twentieth Century Fox Film Corporat...</td>\n",
|
| 794 |
+
" <td>[{'iso_3166_1': 'US', 'name': 'United States o...</td>\n",
|
| 795 |
+
" <td>1995-12-22</td>\n",
|
| 796 |
+
" <td>81452156.0</td>\n",
|
| 797 |
+
" <td>127.0</td>\n",
|
| 798 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 799 |
+
" <td>Friends are the people who let you be yourself...</td>\n",
|
| 800 |
+
" <td>Waiting to Exhale</td>\n",
|
| 801 |
+
" </tr>\n",
|
| 802 |
+
" <tr>\n",
|
| 803 |
+
" <th>4</th>\n",
|
| 804 |
+
" <td>False</td>\n",
|
| 805 |
+
" <td>0</td>\n",
|
| 806 |
+
" <td>[{'id': 35, 'name': 'Comedy'}]</td>\n",
|
| 807 |
+
" <td>11862</td>\n",
|
| 808 |
+
" <td>tt0113041</td>\n",
|
| 809 |
+
" <td>en</td>\n",
|
| 810 |
+
" <td>Father of the Bride Part II</td>\n",
|
| 811 |
+
" <td>Just when George Banks has recovered from his ...</td>\n",
|
| 812 |
+
" <td>8.387519</td>\n",
|
| 813 |
+
" <td>/e64sOI48hQXyru7naBFyssKFxVd.jpg</td>\n",
|
| 814 |
+
" <td>[{'name': 'Sandollar Productions', 'id': 5842}...</td>\n",
|
| 815 |
+
" <td>[{'iso_3166_1': 'US', 'name': 'United States o...</td>\n",
|
| 816 |
+
" <td>1995-02-10</td>\n",
|
| 817 |
+
" <td>76578911.0</td>\n",
|
| 818 |
+
" <td>106.0</td>\n",
|
| 819 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 820 |
+
" <td>Just When His World Is Back To Normal... He's ...</td>\n",
|
| 821 |
+
" <td>Father of the Bride Part II</td>\n",
|
| 822 |
+
" </tr>\n",
|
| 823 |
+
" <tr>\n",
|
| 824 |
+
" <th>...</th>\n",
|
| 825 |
+
" <td>...</td>\n",
|
| 826 |
+
" <td>...</td>\n",
|
| 827 |
+
" <td>...</td>\n",
|
| 828 |
+
" <td>...</td>\n",
|
| 829 |
+
" <td>...</td>\n",
|
| 830 |
+
" <td>...</td>\n",
|
| 831 |
+
" <td>...</td>\n",
|
| 832 |
+
" <td>...</td>\n",
|
| 833 |
+
" <td>...</td>\n",
|
| 834 |
+
" <td>...</td>\n",
|
| 835 |
+
" <td>...</td>\n",
|
| 836 |
+
" <td>...</td>\n",
|
| 837 |
+
" <td>...</td>\n",
|
| 838 |
+
" <td>...</td>\n",
|
| 839 |
+
" <td>...</td>\n",
|
| 840 |
+
" <td>...</td>\n",
|
| 841 |
+
" <td>...</td>\n",
|
| 842 |
+
" <td>...</td>\n",
|
| 843 |
+
" </tr>\n",
|
| 844 |
+
" <tr>\n",
|
| 845 |
+
" <th>45461</th>\n",
|
| 846 |
+
" <td>False</td>\n",
|
| 847 |
+
" <td>0</td>\n",
|
| 848 |
+
" <td>[{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n...</td>\n",
|
| 849 |
+
" <td>439050</td>\n",
|
| 850 |
+
" <td>tt6209470</td>\n",
|
| 851 |
+
" <td>fa</td>\n",
|
| 852 |
+
" <td>رگ خواب</td>\n",
|
| 853 |
+
" <td>Rising and falling between a man and woman.</td>\n",
|
| 854 |
+
" <td>0.072051</td>\n",
|
| 855 |
+
" <td>/jldsYflnId4tTWPx8es3uzsB1I8.jpg</td>\n",
|
| 856 |
+
" <td>[]</td>\n",
|
| 857 |
+
" <td>[{'iso_3166_1': 'IR', 'name': 'Iran'}]</td>\n",
|
| 858 |
+
" <td>NaN</td>\n",
|
| 859 |
+
" <td>0.0</td>\n",
|
| 860 |
+
" <td>90.0</td>\n",
|
| 861 |
+
" <td>[{'iso_639_1': 'fa', 'name': 'فارسی'}]</td>\n",
|
| 862 |
+
" <td>Rising and falling between a man and woman</td>\n",
|
| 863 |
+
" <td>Subdue</td>\n",
|
| 864 |
+
" </tr>\n",
|
| 865 |
+
" <tr>\n",
|
| 866 |
+
" <th>45462</th>\n",
|
| 867 |
+
" <td>False</td>\n",
|
| 868 |
+
" <td>0</td>\n",
|
| 869 |
+
" <td>[{'id': 18, 'name': 'Drama'}]</td>\n",
|
| 870 |
+
" <td>111109</td>\n",
|
| 871 |
+
" <td>tt2028550</td>\n",
|
| 872 |
+
" <td>tl</td>\n",
|
| 873 |
+
" <td>Siglo ng Pagluluwal</td>\n",
|
| 874 |
+
" <td>An artist struggles to finish his work while a...</td>\n",
|
| 875 |
+
" <td>0.178241</td>\n",
|
| 876 |
+
" <td>/xZkmxsNmYXJbKVsTRLLx3pqGHx7.jpg</td>\n",
|
| 877 |
+
" <td>[{'name': 'Sine Olivia', 'id': 19653}]</td>\n",
|
| 878 |
+
" <td>[{'iso_3166_1': 'PH', 'name': 'Philippines'}]</td>\n",
|
| 879 |
+
" <td>2011-11-17</td>\n",
|
| 880 |
+
" <td>0.0</td>\n",
|
| 881 |
+
" <td>360.0</td>\n",
|
| 882 |
+
" <td>[{'iso_639_1': 'tl', 'name': ''}]</td>\n",
|
| 883 |
+
" <td>NaN</td>\n",
|
| 884 |
+
" <td>Century of Birthing</td>\n",
|
| 885 |
+
" </tr>\n",
|
| 886 |
+
" <tr>\n",
|
| 887 |
+
" <th>45463</th>\n",
|
| 888 |
+
" <td>False</td>\n",
|
| 889 |
+
" <td>0</td>\n",
|
| 890 |
+
" <td>[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...</td>\n",
|
| 891 |
+
" <td>67758</td>\n",
|
| 892 |
+
" <td>tt0303758</td>\n",
|
| 893 |
+
" <td>en</td>\n",
|
| 894 |
+
" <td>Betrayal</td>\n",
|
| 895 |
+
" <td>When one of her hits goes wrong, a professiona...</td>\n",
|
| 896 |
+
" <td>0.903007</td>\n",
|
| 897 |
+
" <td>/d5bX92nDsISNhu3ZT69uHwmfCGw.jpg</td>\n",
|
| 898 |
+
" <td>[{'name': 'American World Pictures', 'id': 6165}]</td>\n",
|
| 899 |
+
" <td>[{'iso_3166_1': 'US', 'name': 'United States o...</td>\n",
|
| 900 |
+
" <td>2003-08-01</td>\n",
|
| 901 |
+
" <td>0.0</td>\n",
|
| 902 |
+
" <td>90.0</td>\n",
|
| 903 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 904 |
+
" <td>A deadly game of wits.</td>\n",
|
| 905 |
+
" <td>Betrayal</td>\n",
|
| 906 |
+
" </tr>\n",
|
| 907 |
+
" <tr>\n",
|
| 908 |
+
" <th>45464</th>\n",
|
| 909 |
+
" <td>False</td>\n",
|
| 910 |
+
" <td>0</td>\n",
|
| 911 |
+
" <td>[]</td>\n",
|
| 912 |
+
" <td>227506</td>\n",
|
| 913 |
+
" <td>tt0008536</td>\n",
|
| 914 |
+
" <td>en</td>\n",
|
| 915 |
+
" <td>Satana likuyushchiy</td>\n",
|
| 916 |
+
" <td>In a small town live two brothers, one a minis...</td>\n",
|
| 917 |
+
" <td>0.003503</td>\n",
|
| 918 |
+
" <td>/aorBPO7ak8e8iJKT5OcqYxU3jlK.jpg</td>\n",
|
| 919 |
+
" <td>[{'name': 'Yermoliev', 'id': 88753}]</td>\n",
|
| 920 |
+
" <td>[{'iso_3166_1': 'RU', 'name': 'Russia'}]</td>\n",
|
| 921 |
+
" <td>1917-10-21</td>\n",
|
| 922 |
+
" <td>0.0</td>\n",
|
| 923 |
+
" <td>87.0</td>\n",
|
| 924 |
+
" <td>[]</td>\n",
|
| 925 |
+
" <td>NaN</td>\n",
|
| 926 |
+
" <td>Satan Triumphant</td>\n",
|
| 927 |
+
" </tr>\n",
|
| 928 |
+
" <tr>\n",
|
| 929 |
+
" <th>45465</th>\n",
|
| 930 |
+
" <td>False</td>\n",
|
| 931 |
+
" <td>0</td>\n",
|
| 932 |
+
" <td>[]</td>\n",
|
| 933 |
+
" <td>461257</td>\n",
|
| 934 |
+
" <td>tt6980792</td>\n",
|
| 935 |
+
" <td>en</td>\n",
|
| 936 |
+
" <td>Queerama</td>\n",
|
| 937 |
+
" <td>50 years after decriminalisation of homosexual...</td>\n",
|
| 938 |
+
" <td>0.163015</td>\n",
|
| 939 |
+
" <td>/s5UkZt6NTsrS7ZF0Rh8nzupRlIU.jpg</td>\n",
|
| 940 |
+
" <td>[]</td>\n",
|
| 941 |
+
" <td>[{'iso_3166_1': 'GB', 'name': 'United Kingdom'}]</td>\n",
|
| 942 |
+
" <td>2017-06-09</td>\n",
|
| 943 |
+
" <td>0.0</td>\n",
|
| 944 |
+
" <td>75.0</td>\n",
|
| 945 |
+
" <td>[{'iso_639_1': 'en', 'name': 'English'}]</td>\n",
|
| 946 |
+
" <td>NaN</td>\n",
|
| 947 |
+
" <td>Queerama</td>\n",
|
| 948 |
+
" </tr>\n",
|
| 949 |
+
" </tbody>\n",
|
| 950 |
+
"</table>\n",
|
| 951 |
+
"<p>45466 rows × 18 columns</p>\n",
|
| 952 |
+
"</div>"
|
| 953 |
+
],
|
| 954 |
+
"text/plain": [
|
| 955 |
+
" adult budget genres \\\n",
|
| 956 |
+
"0 False 30000000 [{'id': 16, 'name': 'Animation'}, {'id': 35, '... \n",
|
| 957 |
+
"1 False 65000000 [{'id': 12, 'name': 'Adventure'}, {'id': 14, '... \n",
|
| 958 |
+
"2 False 0 [{'id': 10749, 'name': 'Romance'}, {'id': 35, ... \n",
|
| 959 |
+
"3 False 16000000 [{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam... \n",
|
| 960 |
+
"4 False 0 [{'id': 35, 'name': 'Comedy'}] \n",
|
| 961 |
+
"... ... ... ... \n",
|
| 962 |
+
"45461 False 0 [{'id': 18, 'name': 'Drama'}, {'id': 10751, 'n... \n",
|
| 963 |
+
"45462 False 0 [{'id': 18, 'name': 'Drama'}] \n",
|
| 964 |
+
"45463 False 0 [{'id': 28, 'name': 'Action'}, {'id': 18, 'nam... \n",
|
| 965 |
+
"45464 False 0 [] \n",
|
| 966 |
+
"45465 False 0 [] \n",
|
| 967 |
+
"\n",
|
| 968 |
+
" id imdb_id original_language original_title \\\n",
|
| 969 |
+
"0 862 tt0114709 en Toy Story \n",
|
| 970 |
+
"1 8844 tt0113497 en Jumanji \n",
|
| 971 |
+
"2 15602 tt0113228 en Grumpier Old Men \n",
|
| 972 |
+
"3 31357 tt0114885 en Waiting to Exhale \n",
|
| 973 |
+
"4 11862 tt0113041 en Father of the Bride Part II \n",
|
| 974 |
+
"... ... ... ... ... \n",
|
| 975 |
+
"45461 439050 tt6209470 fa رگ خواب \n",
|
| 976 |
+
"45462 111109 tt2028550 tl Siglo ng Pagluluwal \n",
|
| 977 |
+
"45463 67758 tt0303758 en Betrayal \n",
|
| 978 |
+
"45464 227506 tt0008536 en Satana likuyushchiy \n",
|
| 979 |
+
"45465 461257 tt6980792 en Queerama \n",
|
| 980 |
+
"\n",
|
| 981 |
+
" overview popularity \\\n",
|
| 982 |
+
"0 Led by Woody, Andy's toys live happily in his ... 21.946943 \n",
|
| 983 |
+
"1 When siblings Judy and Peter discover an encha... 17.015539 \n",
|
| 984 |
+
"2 A family wedding reignites the ancient feud be... 11.7129 \n",
|
| 985 |
+
"3 Cheated on, mistreated and stepped on, the wom... 3.859495 \n",
|
| 986 |
+
"4 Just when George Banks has recovered from his ... 8.387519 \n",
|
| 987 |
+
"... ... ... \n",
|
| 988 |
+
"45461 Rising and falling between a man and woman. 0.072051 \n",
|
| 989 |
+
"45462 An artist struggles to finish his work while a... 0.178241 \n",
|
| 990 |
+
"45463 When one of her hits goes wrong, a professiona... 0.903007 \n",
|
| 991 |
+
"45464 In a small town live two brothers, one a minis... 0.003503 \n",
|
| 992 |
+
"45465 50 years after decriminalisation of homosexual... 0.163015 \n",
|
| 993 |
+
"\n",
|
| 994 |
+
" poster_path \\\n",
|
| 995 |
+
"0 /rhIRbceoE9lR4veEXuwCC2wARtG.jpg \n",
|
| 996 |
+
"1 /vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg \n",
|
| 997 |
+
"2 /6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg \n",
|
| 998 |
+
"3 /16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg \n",
|
| 999 |
+
"4 /e64sOI48hQXyru7naBFyssKFxVd.jpg \n",
|
| 1000 |
+
"... ... \n",
|
| 1001 |
+
"45461 /jldsYflnId4tTWPx8es3uzsB1I8.jpg \n",
|
| 1002 |
+
"45462 /xZkmxsNmYXJbKVsTRLLx3pqGHx7.jpg \n",
|
| 1003 |
+
"45463 /d5bX92nDsISNhu3ZT69uHwmfCGw.jpg \n",
|
| 1004 |
+
"45464 /aorBPO7ak8e8iJKT5OcqYxU3jlK.jpg \n",
|
| 1005 |
+
"45465 /s5UkZt6NTsrS7ZF0Rh8nzupRlIU.jpg \n",
|
| 1006 |
+
"\n",
|
| 1007 |
+
" production_companies \\\n",
|
| 1008 |
+
"0 [{'name': 'Pixar Animation Studios', 'id': 3}] \n",
|
| 1009 |
+
"1 [{'name': 'TriStar Pictures', 'id': 559}, {'na... \n",
|
| 1010 |
+
"2 [{'name': 'Warner Bros.', 'id': 6194}, {'name'... \n",
|
| 1011 |
+
"3 [{'name': 'Twentieth Century Fox Film Corporat... \n",
|
| 1012 |
+
"4 [{'name': 'Sandollar Productions', 'id': 5842}... \n",
|
| 1013 |
+
"... ... \n",
|
| 1014 |
+
"45461 [] \n",
|
| 1015 |
+
"45462 [{'name': 'Sine Olivia', 'id': 19653}] \n",
|
| 1016 |
+
"45463 [{'name': 'American World Pictures', 'id': 6165}] \n",
|
| 1017 |
+
"45464 [{'name': 'Yermoliev', 'id': 88753}] \n",
|
| 1018 |
+
"45465 [] \n",
|
| 1019 |
+
"\n",
|
| 1020 |
+
" production_countries release_date \\\n",
|
| 1021 |
+
"0 [{'iso_3166_1': 'US', 'name': 'United States o... 1995-10-30 \n",
|
| 1022 |
+
"1 [{'iso_3166_1': 'US', 'name': 'United States o... 1995-12-15 \n",
|
| 1023 |
+
"2 [{'iso_3166_1': 'US', 'name': 'United States o... 1995-12-22 \n",
|
| 1024 |
+
"3 [{'iso_3166_1': 'US', 'name': 'United States o... 1995-12-22 \n",
|
| 1025 |
+
"4 [{'iso_3166_1': 'US', 'name': 'United States o... 1995-02-10 \n",
|
| 1026 |
+
"... ... ... \n",
|
| 1027 |
+
"45461 [{'iso_3166_1': 'IR', 'name': 'Iran'}] NaN \n",
|
| 1028 |
+
"45462 [{'iso_3166_1': 'PH', 'name': 'Philippines'}] 2011-11-17 \n",
|
| 1029 |
+
"45463 [{'iso_3166_1': 'US', 'name': 'United States o... 2003-08-01 \n",
|
| 1030 |
+
"45464 [{'iso_3166_1': 'RU', 'name': 'Russia'}] 1917-10-21 \n",
|
| 1031 |
+
"45465 [{'iso_3166_1': 'GB', 'name': 'United Kingdom'}] 2017-06-09 \n",
|
| 1032 |
+
"\n",
|
| 1033 |
+
" revenue runtime \\\n",
|
| 1034 |
+
"0 373554033.0 81.0 \n",
|
| 1035 |
+
"1 262797249.0 104.0 \n",
|
| 1036 |
+
"2 0.0 101.0 \n",
|
| 1037 |
+
"3 81452156.0 127.0 \n",
|
| 1038 |
+
"4 76578911.0 106.0 \n",
|
| 1039 |
+
"... ... ... \n",
|
| 1040 |
+
"45461 0.0 90.0 \n",
|
| 1041 |
+
"45462 0.0 360.0 \n",
|
| 1042 |
+
"45463 0.0 90.0 \n",
|
| 1043 |
+
"45464 0.0 87.0 \n",
|
| 1044 |
+
"45465 0.0 75.0 \n",
|
| 1045 |
+
"\n",
|
| 1046 |
+
" spoken_languages \\\n",
|
| 1047 |
+
"0 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 1048 |
+
"1 [{'iso_639_1': 'en', 'name': 'English'}, {'iso... \n",
|
| 1049 |
+
"2 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 1050 |
+
"3 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 1051 |
+
"4 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 1052 |
+
"... ... \n",
|
| 1053 |
+
"45461 [{'iso_639_1': 'fa', 'name': 'فارسی'}] \n",
|
| 1054 |
+
"45462 [{'iso_639_1': 'tl', 'name': ''}] \n",
|
| 1055 |
+
"45463 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 1056 |
+
"45464 [] \n",
|
| 1057 |
+
"45465 [{'iso_639_1': 'en', 'name': 'English'}] \n",
|
| 1058 |
+
"\n",
|
| 1059 |
+
" tagline \\\n",
|
| 1060 |
+
"0 NaN \n",
|
| 1061 |
+
"1 Roll the dice and unleash the excitement! \n",
|
| 1062 |
+
"2 Still Yelling. Still Fighting. Still Ready for... \n",
|
| 1063 |
+
"3 Friends are the people who let you be yourself... \n",
|
| 1064 |
+
"4 Just When His World Is Back To Normal... He's ... \n",
|
| 1065 |
+
"... ... \n",
|
| 1066 |
+
"45461 Rising and falling between a man and woman \n",
|
| 1067 |
+
"45462 NaN \n",
|
| 1068 |
+
"45463 A deadly game of wits. \n",
|
| 1069 |
+
"45464 NaN \n",
|
| 1070 |
+
"45465 NaN \n",
|
| 1071 |
+
"\n",
|
| 1072 |
+
" title \n",
|
| 1073 |
+
"0 Toy Story \n",
|
| 1074 |
+
"1 Jumanji \n",
|
| 1075 |
+
"2 Grumpier Old Men \n",
|
| 1076 |
+
"3 Waiting to Exhale \n",
|
| 1077 |
+
"4 Father of the Bride Part II \n",
|
| 1078 |
+
"... ... \n",
|
| 1079 |
+
"45461 Subdue \n",
|
| 1080 |
+
"45462 Century of Birthing \n",
|
| 1081 |
+
"45463 Betrayal \n",
|
| 1082 |
+
"45464 Satan Triumphant \n",
|
| 1083 |
+
"45465 Queerama \n",
|
| 1084 |
+
"\n",
|
| 1085 |
+
"[45466 rows x 18 columns]"
|
| 1086 |
+
]
|
| 1087 |
+
},
|
| 1088 |
+
"execution_count": 22,
|
| 1089 |
+
"metadata": {},
|
| 1090 |
+
"output_type": "execute_result"
|
| 1091 |
+
}
|
| 1092 |
+
],
|
| 1093 |
+
"source": [
|
| 1094 |
+
"df"
|
| 1095 |
+
]
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"cell_type": "code",
|
| 1099 |
+
"execution_count": 23,
|
| 1100 |
+
"id": "0f83fc57-1c94-44a1-a4b1-b51ca334097f",
|
| 1101 |
+
"metadata": {},
|
| 1102 |
+
"outputs": [
|
| 1103 |
+
{
|
| 1104 |
+
"data": {
|
| 1105 |
+
"text/plain": [
|
| 1106 |
+
"count 45461\n",
|
| 1107 |
+
"unique 44176\n",
|
| 1108 |
+
"top 0.0\n",
|
| 1109 |
+
"freq 34\n",
|
| 1110 |
+
"Name: popularity, dtype: object"
|
| 1111 |
+
]
|
| 1112 |
+
},
|
| 1113 |
+
"execution_count": 23,
|
| 1114 |
+
"metadata": {},
|
| 1115 |
+
"output_type": "execute_result"
|
| 1116 |
+
}
|
| 1117 |
+
],
|
| 1118 |
+
"source": [
|
| 1119 |
+
"df['popularity'].describe()"
|
| 1120 |
+
]
|
| 1121 |
+
},
|
| 1122 |
+
{
|
| 1123 |
+
"cell_type": "code",
|
| 1124 |
+
"execution_count": 24,
|
| 1125 |
+
"id": "e693550c-9eed-47ed-8de9-402919a59228",
|
| 1126 |
+
"metadata": {},
|
| 1127 |
+
"outputs": [
|
| 1128 |
+
{
|
| 1129 |
+
"data": {
|
| 1130 |
+
"text/html": [
|
| 1131 |
+
"<div>\n",
|
| 1132 |
+
"<style scoped>\n",
|
| 1133 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 1134 |
+
" vertical-align: middle;\n",
|
| 1135 |
+
" }\n",
|
| 1136 |
+
"\n",
|
| 1137 |
+
" .dataframe tbody tr th {\n",
|
| 1138 |
+
" vertical-align: top;\n",
|
| 1139 |
+
" }\n",
|
| 1140 |
+
"\n",
|
| 1141 |
+
" .dataframe thead th {\n",
|
| 1142 |
+
" text-align: right;\n",
|
| 1143 |
+
" }\n",
|
| 1144 |
+
"</style>\n",
|
| 1145 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 1146 |
+
" <thead>\n",
|
| 1147 |
+
" <tr style=\"text-align: right;\">\n",
|
| 1148 |
+
" <th></th>\n",
|
| 1149 |
+
" <th>revenue</th>\n",
|
| 1150 |
+
" <th>runtime</th>\n",
|
| 1151 |
+
" </tr>\n",
|
| 1152 |
+
" </thead>\n",
|
| 1153 |
+
" <tbody>\n",
|
| 1154 |
+
" <tr>\n",
|
| 1155 |
+
" <th>count</th>\n",
|
| 1156 |
+
" <td>4.546000e+04</td>\n",
|
| 1157 |
+
" <td>45203.000000</td>\n",
|
| 1158 |
+
" </tr>\n",
|
| 1159 |
+
" <tr>\n",
|
| 1160 |
+
" <th>mean</th>\n",
|
| 1161 |
+
" <td>1.120935e+07</td>\n",
|
| 1162 |
+
" <td>94.128199</td>\n",
|
| 1163 |
+
" </tr>\n",
|
| 1164 |
+
" <tr>\n",
|
| 1165 |
+
" <th>std</th>\n",
|
| 1166 |
+
" <td>6.433225e+07</td>\n",
|
| 1167 |
+
" <td>38.407810</td>\n",
|
| 1168 |
+
" </tr>\n",
|
| 1169 |
+
" <tr>\n",
|
| 1170 |
+
" <th>min</th>\n",
|
| 1171 |
+
" <td>0.000000e+00</td>\n",
|
| 1172 |
+
" <td>0.000000</td>\n",
|
| 1173 |
+
" </tr>\n",
|
| 1174 |
+
" <tr>\n",
|
| 1175 |
+
" <th>25%</th>\n",
|
| 1176 |
+
" <td>0.000000e+00</td>\n",
|
| 1177 |
+
" <td>85.000000</td>\n",
|
| 1178 |
+
" </tr>\n",
|
| 1179 |
+
" <tr>\n",
|
| 1180 |
+
" <th>50%</th>\n",
|
| 1181 |
+
" <td>0.000000e+00</td>\n",
|
| 1182 |
+
" <td>95.000000</td>\n",
|
| 1183 |
+
" </tr>\n",
|
| 1184 |
+
" <tr>\n",
|
| 1185 |
+
" <th>75%</th>\n",
|
| 1186 |
+
" <td>0.000000e+00</td>\n",
|
| 1187 |
+
" <td>107.000000</td>\n",
|
| 1188 |
+
" </tr>\n",
|
| 1189 |
+
" <tr>\n",
|
| 1190 |
+
" <th>max</th>\n",
|
| 1191 |
+
" <td>2.787965e+09</td>\n",
|
| 1192 |
+
" <td>1256.000000</td>\n",
|
| 1193 |
+
" </tr>\n",
|
| 1194 |
+
" </tbody>\n",
|
| 1195 |
+
"</table>\n",
|
| 1196 |
+
"</div>"
|
| 1197 |
+
],
|
| 1198 |
+
"text/plain": [
|
| 1199 |
+
" revenue runtime\n",
|
| 1200 |
+
"count 4.546000e+04 45203.000000\n",
|
| 1201 |
+
"mean 1.120935e+07 94.128199\n",
|
| 1202 |
+
"std 6.433225e+07 38.407810\n",
|
| 1203 |
+
"min 0.000000e+00 0.000000\n",
|
| 1204 |
+
"25% 0.000000e+00 85.000000\n",
|
| 1205 |
+
"50% 0.000000e+00 95.000000\n",
|
| 1206 |
+
"75% 0.000000e+00 107.000000\n",
|
| 1207 |
+
"max 2.787965e+09 1256.000000"
|
| 1208 |
+
]
|
| 1209 |
+
},
|
| 1210 |
+
"execution_count": 24,
|
| 1211 |
+
"metadata": {},
|
| 1212 |
+
"output_type": "execute_result"
|
| 1213 |
+
}
|
| 1214 |
+
],
|
| 1215 |
+
"source": [
|
| 1216 |
+
"df.describe()"
|
| 1217 |
+
]
|
| 1218 |
+
},
|
| 1219 |
+
{
|
| 1220 |
+
"cell_type": "code",
|
| 1221 |
+
"execution_count": null,
|
| 1222 |
+
"id": "9ae3ea42-2343-4f8a-9740-adf7a11d7d5f",
|
| 1223 |
+
"metadata": {},
|
| 1224 |
+
"outputs": [],
|
| 1225 |
+
"source": [
|
| 1226 |
+
"df.drop("
|
| 1227 |
+
]
|
| 1228 |
+
}
|
| 1229 |
+
],
|
| 1230 |
+
"metadata": {
|
| 1231 |
+
"kernelspec": {
|
| 1232 |
+
"display_name": "Python 3 (ipykernel)",
|
| 1233 |
+
"language": "python",
|
| 1234 |
+
"name": "python3"
|
| 1235 |
+
},
|
| 1236 |
+
"language_info": {
|
| 1237 |
+
"codemirror_mode": {
|
| 1238 |
+
"name": "ipython",
|
| 1239 |
+
"version": 3
|
| 1240 |
+
},
|
| 1241 |
+
"file_extension": ".py",
|
| 1242 |
+
"mimetype": "text/x-python",
|
| 1243 |
+
"name": "python",
|
| 1244 |
+
"nbconvert_exporter": "python",
|
| 1245 |
+
"pygments_lexer": "ipython3",
|
| 1246 |
+
"version": "3.11.14"
|
| 1247 |
+
}
|
| 1248 |
+
},
|
| 1249 |
+
"nbformat": 4,
|
| 1250 |
+
"nbformat_minor": 5
|
| 1251 |
+
}
|
main.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.preprocessing import parse_features
|
| 2 |
+
from src.recommender import MovieRecommender
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
# 1. Setup paths
|
| 6 |
+
# Make sure your csv is exactly at this path
|
| 7 |
+
csv_path = 'data/movies_metadata.csv'
|
| 8 |
+
|
| 9 |
+
# 2. Run Preprocessing
|
| 10 |
+
# This handles the 45k rows and fixes the bad ID bug
|
| 11 |
+
df = parse_features(csv_path)
|
| 12 |
+
|
| 13 |
+
# 3. Initialize Recommender
|
| 14 |
+
# Using the CPU-optimized class we discussed
|
| 15 |
+
rec = MovieRecommender()
|
| 16 |
+
|
| 17 |
+
# 4. Generate Embeddings
|
| 18 |
+
# This is the heavy step. Go grab a coffee.
|
| 19 |
+
start_time = time.time()
|
| 20 |
+
rec.preprocess_and_embed(df)
|
| 21 |
+
print(f"Total time taken: {(time.time() - start_time)/60:.2f} minutes")
|
| 22 |
+
|
| 23 |
+
# 5. Save the result so you don't have to wait next time
|
| 24 |
+
rec.save('models/')
|
| 25 |
+
|
| 26 |
+
# 6. Quick Test
|
| 27 |
+
print("\n--- Test Recommendation ---")
|
| 28 |
+
print(rec.recommend_by_movie("Jumanji"))
|
models/metadata.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78472dfc4fcf4703b633dc55e20c3ac24dcb627e0ddef239d63d89314cf56716
|
| 3 |
+
size 19236982
|
models/movie_index.faiss
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f489fa4ce9d3a757267af4dacb3b9d3f7339bb9ebf798e0403e58fe14d2171a8
|
| 3 |
+
size 69970989
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas>=1.3.0
|
| 2 |
+
numpy>=1.21.0
|
| 3 |
+
requests>=2.26.0
|
| 4 |
+
sentence-transformers>=2.2.0
|
| 5 |
+
faiss-cpu>=1.7.2
|
src/__init__.py
ADDED
|
File without changes
|
src/__pycache__/preprocessing.cpython-311.pyc
ADDED
|
Binary file (3.13 kB). View file
|
|
|
src/__pycache__/preprocessing.cpython-312.pyc
ADDED
|
Binary file (2.63 kB). View file
|
|
|
src/__pycache__/recommender.cpython-311.pyc
ADDED
|
Binary file (8.52 kB). View file
|
|
|
src/ingest.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
import os
|
| 3 |
+
import requests
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from src.recommender import MovieRecommender
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# CONFIGURATION
|
| 10 |
+
API_KEY = os.getenv('API_KEY')
|
| 11 |
+
BASE_URL = "https://api.themoviedb.org/3"
|
| 12 |
+
|
| 13 |
+
def get_genre_map():
|
| 14 |
+
"""
|
| 15 |
+
Fetches the official list of Genre IDs -> Names.
|
| 16 |
+
Example: {28: 'Action', 12: 'Adventure'}
|
| 17 |
+
"""
|
| 18 |
+
url = f"{BASE_URL}/genre/movie/list"
|
| 19 |
+
params = {'api_key': API_KEY, 'language': 'en-US'}
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
response = requests.get(url, params=params)
|
| 23 |
+
data = response.json()
|
| 24 |
+
# Turn list of dicts into a single lookup dictionary
|
| 25 |
+
mapping = {g['id']: g['name'] for g in data['genres']}
|
| 26 |
+
print("Genre map loaded.")
|
| 27 |
+
return mapping
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"Failed to load genres: {e}")
|
| 30 |
+
return {}
|
| 31 |
+
|
| 32 |
+
def get_popular_movies(limit=100):
|
| 33 |
+
"""
|
| 34 |
+
Fetches the current most popular movies.
|
| 35 |
+
"""
|
| 36 |
+
movies = []
|
| 37 |
+
pages = (limit // 20) + 1
|
| 38 |
+
|
| 39 |
+
print(f"Fetching top {limit} popular movies...")
|
| 40 |
+
|
| 41 |
+
for page in range(1, pages + 1):
|
| 42 |
+
url = f"{BASE_URL}/movie/popular"
|
| 43 |
+
params = {
|
| 44 |
+
'api_key': API_KEY,
|
| 45 |
+
'language': 'en-US',
|
| 46 |
+
'page': page
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
try:
|
| 50 |
+
response = requests.get(url, params=params)
|
| 51 |
+
if response.status_code == 200:
|
| 52 |
+
results = response.json().get('results', [])
|
| 53 |
+
movies.extend(results)
|
| 54 |
+
else:
|
| 55 |
+
print(f"Error on page {page}: {response.status_code}")
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"Connection failed: {e}")
|
| 58 |
+
|
| 59 |
+
if len(movies) >= limit:
|
| 60 |
+
break
|
| 61 |
+
|
| 62 |
+
return movies[:limit]
|
| 63 |
+
|
| 64 |
+
def run_weekly_update():
|
| 65 |
+
# 1. Load the Brain
|
| 66 |
+
print("Loading existing database...")
|
| 67 |
+
rec = MovieRecommender()
|
| 68 |
+
try:
|
| 69 |
+
rec.load('models/')
|
| 70 |
+
existing_ids = set(rec.movies['id'].values)
|
| 71 |
+
print(f"Database currently holds {len(existing_ids)} movies.")
|
| 72 |
+
except:
|
| 73 |
+
print("No existing model found. Cannot update empty model.")
|
| 74 |
+
return
|
| 75 |
+
|
| 76 |
+
# 2. Get Data
|
| 77 |
+
candidates = get_popular_movies(limit=100)
|
| 78 |
+
genre_map = get_genre_map() # <--- NEW STEP
|
| 79 |
+
|
| 80 |
+
# 3. Filter Duplicates
|
| 81 |
+
new_movies_to_process = []
|
| 82 |
+
for m in candidates:
|
| 83 |
+
if m['id'] not in existing_ids:
|
| 84 |
+
new_movies_to_process.append(m)
|
| 85 |
+
|
| 86 |
+
count = len(new_movies_to_process)
|
| 87 |
+
print(f"New additions found: {count}")
|
| 88 |
+
|
| 89 |
+
if count == 0:
|
| 90 |
+
return
|
| 91 |
+
|
| 92 |
+
# 4. Compute & Ingest
|
| 93 |
+
print(f"Processing {count} new movies...")
|
| 94 |
+
|
| 95 |
+
for m in new_movies_to_process:
|
| 96 |
+
# Resolve Genre IDs to Names
|
| 97 |
+
# m['genre_ids'] might look like [28, 12]
|
| 98 |
+
# We turn that into "Action Adventure"
|
| 99 |
+
current_genres = [genre_map.get(gid, '') for gid in m.get('genre_ids', [])]
|
| 100 |
+
genre_str = " ".join(current_genres)
|
| 101 |
+
|
| 102 |
+
# Build the Soup with Tags
|
| 103 |
+
# Structure: Title + Title + Genres + Overview
|
| 104 |
+
soup = f"{m['title']} {m['title']} {genre_str} {m.get('overview', '')}"
|
| 105 |
+
|
| 106 |
+
movie_data = {
|
| 107 |
+
'id': m['id'],
|
| 108 |
+
'title': m['title'],
|
| 109 |
+
'soup': soup
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
rec.add_new_movie(movie_data)
|
| 113 |
+
print(f" - Added: {m['title']} ({genre_str})")
|
| 114 |
+
|
| 115 |
+
# 5. Save
|
| 116 |
+
print("Saving updated index to disk...")
|
| 117 |
+
rec.save('models/')
|
| 118 |
+
print("Update Complete!")
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
run_weekly_update()
|
src/preprocessing.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import ast
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
def clean_data(x):
|
| 6 |
+
"""
|
| 7 |
+
Helper to convert stringified lists like "[{'name': 'Action'}]"
|
| 8 |
+
into a simple string "Action"
|
| 9 |
+
"""
|
| 10 |
+
if isinstance(x, str):
|
| 11 |
+
try:
|
| 12 |
+
# Safely evaluate the string as a Python list/dict
|
| 13 |
+
item_list = ast.literal_eval(x)
|
| 14 |
+
if isinstance(item_list, list):
|
| 15 |
+
# Extract the 'name' key from each dict in the list
|
| 16 |
+
return ' '.join([i['name'] for i in item_list if 'name' in i])
|
| 17 |
+
except (ValueError, SyntaxError):
|
| 18 |
+
return ""
|
| 19 |
+
return ""
|
| 20 |
+
|
| 21 |
+
def parse_features(data_path):
|
| 22 |
+
print(f"Loading data from {data_path}...")
|
| 23 |
+
|
| 24 |
+
# 1. Load Data
|
| 25 |
+
# 'on_bad_lines' skips the few corrupted rows in this specific dataset
|
| 26 |
+
df = pd.read_csv(data_path, low_memory=False)
|
| 27 |
+
|
| 28 |
+
# 2. Filter Bad IDs
|
| 29 |
+
# This dataset has a known bug where some IDs are dates (e.g., '1995-10-20')
|
| 30 |
+
# We force 'id' to numeric and drop rows that fail
|
| 31 |
+
df['id'] = pd.to_numeric(df['id'], errors='coerce')
|
| 32 |
+
df = df.dropna(subset=['id'])
|
| 33 |
+
df['id'] = df['id'].astype(int)
|
| 34 |
+
|
| 35 |
+
# 3. Fill NaNs
|
| 36 |
+
df['title'] = df['title'].fillna('')
|
| 37 |
+
df['overview'] = df['overview'].fillna('')
|
| 38 |
+
df['tagline'] = df['tagline'].fillna('')
|
| 39 |
+
df['genres'] = df['genres'].fillna('[]')
|
| 40 |
+
|
| 41 |
+
print("Parsing genres (this might take a moment)...")
|
| 42 |
+
# 4. Clean Genres
|
| 43 |
+
df['genre_names'] = df['genres'].apply(clean_data)
|
| 44 |
+
|
| 45 |
+
# 5. Create the "Soup"
|
| 46 |
+
# We combine Title (2x weight), Tagline, Overview, and Genres
|
| 47 |
+
def create_soup(x):
|
| 48 |
+
return f"{x['title']} {x['title']} {x['tagline']} {x['overview']} {x['genre_names']}"
|
| 49 |
+
|
| 50 |
+
df['soup'] = df.apply(create_soup, axis=1)
|
| 51 |
+
|
| 52 |
+
# 6. Return only what we need to save memory
|
| 53 |
+
final_df = df[['id', 'title', 'soup']].reset_index(drop=True)
|
| 54 |
+
|
| 55 |
+
print(f"Cleaned data: {len(final_df)} movies ready for embedding.")
|
| 56 |
+
return final_df
|
src/recommender.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import faiss
|
| 4 |
+
from sentence_transformers import SentenceTransformer
|
| 5 |
+
import pickle
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
class MovieRecommender:
|
| 9 |
+
def __init__(self, model_name='all-MiniLM-L6-v2'):
|
| 10 |
+
# Load the Transformer model
|
| 11 |
+
self.encoder = SentenceTransformer(model_name)
|
| 12 |
+
# Dimension of the embedding (384 for MiniLM)
|
| 13 |
+
self.d = 384
|
| 14 |
+
self.index = None
|
| 15 |
+
self.movies = pd.DataFrame()
|
| 16 |
+
self.id_map = {} # Maps FAISS index ID to Movie ID
|
| 17 |
+
|
| 18 |
+
def preprocess_and_embed(self, df):
|
| 19 |
+
self.movies = df.reset_index(drop=True)
|
| 20 |
+
|
| 21 |
+
# CPU OPTIMIZATION: Process in batches
|
| 22 |
+
# This prevents RAM spikes and shows you a progress bar
|
| 23 |
+
batch_size = 64
|
| 24 |
+
print(f"Generating embeddings on CPU in batches of {batch_size}...")
|
| 25 |
+
|
| 26 |
+
# sentence-transformers handles batching internally,
|
| 27 |
+
# but setting explicit batch_size helps on CPU
|
| 28 |
+
embeddings = self.encoder.encode(
|
| 29 |
+
self.movies['soup'].tolist(),
|
| 30 |
+
batch_size=batch_size,
|
| 31 |
+
show_progress_bar=True,
|
| 32 |
+
convert_to_numpy=True
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
print("Normalizing vectors...")
|
| 36 |
+
faiss.normalize_L2(embeddings)
|
| 37 |
+
|
| 38 |
+
print("Building Index...")
|
| 39 |
+
# IndexFlatIP is brute-force but highly optimized for CPU.
|
| 40 |
+
# It relies on BLAS libraries (MKL/OpenBLAS) which your CPU uses naturally.
|
| 41 |
+
self.index = faiss.IndexFlatIP(self.d)
|
| 42 |
+
self.index.add(embeddings)
|
| 43 |
+
|
| 44 |
+
self.id_map = {i: row['id'] for i, row in self.movies.iterrows()}
|
| 45 |
+
print(f"Done. Indexed {self.index.ntotal} movies.")
|
| 46 |
+
|
| 47 |
+
def search(self, query_vector, k=5):
|
| 48 |
+
"""
|
| 49 |
+
Low-level search: Input vector -> Top K Movie IDs
|
| 50 |
+
"""
|
| 51 |
+
# Ensure vector is 2D (1, d) and normalized
|
| 52 |
+
query_vector = query_vector.reshape(1, -1)
|
| 53 |
+
faiss.normalize_L2(query_vector)
|
| 54 |
+
|
| 55 |
+
# Search
|
| 56 |
+
distances, indices = self.index.search(query_vector, k)
|
| 57 |
+
|
| 58 |
+
results = []
|
| 59 |
+
for i, idx in enumerate(indices[0]):
|
| 60 |
+
if idx != -1: # FAISS returns -1 if not found
|
| 61 |
+
movie_id = self.id_map[idx]
|
| 62 |
+
results.append({
|
| 63 |
+
"movie_id": int(movie_id),
|
| 64 |
+
"score": float(distances[0][i]),
|
| 65 |
+
"title": self.movies[self.movies['id'] == movie_id]['title'].values[0]
|
| 66 |
+
})
|
| 67 |
+
return results
|
| 68 |
+
|
| 69 |
+
def recommend_by_movie(self, movie_title, k=5):
|
| 70 |
+
"""
|
| 71 |
+
Find movies similar to a specific movie title already in DB.
|
| 72 |
+
"""
|
| 73 |
+
# Find the movie's vector
|
| 74 |
+
movie_row = self.movies[self.movies['title'].str.contains(movie_title, case=False)]
|
| 75 |
+
if movie_row.empty:
|
| 76 |
+
return "Movie not found."
|
| 77 |
+
|
| 78 |
+
# Re-encode is safer to ensure we have the exact vector logic,
|
| 79 |
+
# or we could cache the vectors in the dataframe to save time.
|
| 80 |
+
# Here we re-encode for simplicity.
|
| 81 |
+
vec = self.encoder.encode([movie_row.iloc[0]['soup']])
|
| 82 |
+
return self.search(vec, k)
|
| 83 |
+
|
| 84 |
+
def recommend_on_text(self, text_query, k=5):
|
| 85 |
+
"""
|
| 86 |
+
Recommends movies based on a raw text description.
|
| 87 |
+
Example: "A romantic movie about a sinking ship" -> Titanic
|
| 88 |
+
"""
|
| 89 |
+
print(f"Searching for: '{text_query}'...")
|
| 90 |
+
|
| 91 |
+
# 1. Encode the user's text into a vector
|
| 92 |
+
# This runs on the CPU in milliseconds because it's just one sentence.
|
| 93 |
+
query_vector = self.encoder.encode([text_query])
|
| 94 |
+
|
| 95 |
+
# 2. Normalize (Important for Cosine Similarity)
|
| 96 |
+
faiss.normalize_L2(query_vector)
|
| 97 |
+
|
| 98 |
+
# 3. Search the existing index
|
| 99 |
+
# We reuse the same search logic we defined earlier
|
| 100 |
+
return self.search(query_vector, k)
|
| 101 |
+
|
| 102 |
+
def recommend_for_user(self, liked_movie_titles, k=5):
|
| 103 |
+
"""
|
| 104 |
+
The Personalized Logic.
|
| 105 |
+
1. Get vectors for all movies the user liked.
|
| 106 |
+
2. Average them to create a 'User Profile Vector'.
|
| 107 |
+
3. Search against the index.
|
| 108 |
+
"""
|
| 109 |
+
vectors = []
|
| 110 |
+
for title in liked_movie_titles:
|
| 111 |
+
# Find movie in our DB
|
| 112 |
+
movie_row = self.movies[self.movies['title'].str.contains(title, case=False)]
|
| 113 |
+
if not movie_row.empty:
|
| 114 |
+
soup = movie_row.iloc[0]['soup']
|
| 115 |
+
vectors.append(self.encoder.encode(soup))
|
| 116 |
+
|
| 117 |
+
if not vectors:
|
| 118 |
+
return "No known movies provided."
|
| 119 |
+
|
| 120 |
+
# Average the vectors (User Profile)
|
| 121 |
+
user_vector = np.mean(vectors, axis=0)
|
| 122 |
+
return self.search(user_vector, k)
|
| 123 |
+
|
| 124 |
+
def add_new_movie(self, movie_dict):
|
| 125 |
+
"""
|
| 126 |
+
Incremental Learning: Add a movie without retraining.
|
| 127 |
+
movie_dict: {'id': 123, 'title': 'New Movie', 'soup': 'description...'}
|
| 128 |
+
"""
|
| 129 |
+
# 1. Vectorize the new soup
|
| 130 |
+
new_vec = self.encoder.encode([movie_dict['soup']])
|
| 131 |
+
faiss.normalize_L2(new_vec)
|
| 132 |
+
|
| 133 |
+
# 2. Add to FAISS Index (Instant)
|
| 134 |
+
self.index.add(new_vec)
|
| 135 |
+
|
| 136 |
+
# 3. Update Metadata DataFrame
|
| 137 |
+
# We append the new row to the pandas dataframe
|
| 138 |
+
new_row = pd.DataFrame([movie_dict])
|
| 139 |
+
self.movies = pd.concat([self.movies, new_row], ignore_index=True)
|
| 140 |
+
|
| 141 |
+
# 4. Update ID Map
|
| 142 |
+
# The new index ID is the last position in the dataframe
|
| 143 |
+
next_idx = len(self.movies) - 1
|
| 144 |
+
self.id_map[next_idx] = movie_dict['id']
|
| 145 |
+
|
| 146 |
+
def save(self, path='models/'):
|
| 147 |
+
"""Persistence"""
|
| 148 |
+
os.makedirs(path, exist_ok=True)
|
| 149 |
+
faiss.write_index(self.index, os.path.join(path, 'movie_index.faiss'))
|
| 150 |
+
self.movies.to_pickle(os.path.join(path, 'metadata.pkl'))
|
| 151 |
+
# Note: We don't save the encoder, we load it fresh every time.
|
| 152 |
+
|
| 153 |
+
def load(self, path='models/'):
|
| 154 |
+
self.index = faiss.read_index(os.path.join(path, 'movie_index.faiss'))
|
| 155 |
+
self.movies = pd.read_pickle(os.path.join(path, 'metadata.pkl'))
|
| 156 |
+
self.id_map = {i: row['id'] for i, row in self.movies.iterrows()}
|
test.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.recommender import MovieRecommender
|
| 2 |
+
|
| 3 |
+
rec = MovieRecommender()
|
| 4 |
+
rec.load('models/')
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Assuming 'rec' is your loaded MovieRecommender instance
|
| 8 |
+
|
| 9 |
+
# Example 1: Vague description
|
| 10 |
+
print(rec.recommend_on_text("running man"))
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# # Example 2: Specific vibe
|
| 14 |
+
# print(rec.recommend_on_text("mind bending sci-fi about dreams within dreams"))
|
| 15 |
+
# # Expected: Inception
|
| 16 |
+
|
| 17 |
+
# # Example 3: Emotional query
|
| 18 |
+
# print(rec.recommend_on_text("sad movie that makes me cry about a dog"))
|
| 19 |
+
# # Expected: Hachi: A Dog's Tale or Marley & Me
|