File size: 7,224 Bytes
e885901
 
 
 
 
 
502c283
e885901
502c283
e885901
 
502c283
e885901
502c283
e885901
 
502c283
e885901
 
 
502c283
e885901
 
502c283
e885901
 
 
502c283
e885901
 
 
 
 
 
 
 
502c283
e885901
 
 
 
 
 
 
 
 
502c283
e885901
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502c283
 
 
e885901
 
 
 
 
 
 
502c283
 
e885901
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import requests
import gradio as gr
import os

ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movies.csv")

OMDB_API_KEY = os.environ.get("omdbapikey")

movie_lookup = movies.set_index("movieId")["title"].to_dict()
reverse_movie_lookup = {v.lower(): k for k, v in movie_lookup.items()}

train_df, test_df = train_test_split(ratings, test_size=0.2, random_state=42)
train_matrix = train_df.pivot_table(index='userId', columns='movieId', values='rating')
train_matrix_filled = train_matrix.fillna(0)

user_similarity = cosine_similarity(train_matrix_filled)
user_similarity_df = pd.DataFrame(user_similarity, index=train_matrix_filled.index, columns=train_matrix_filled.index)

item_rating_matrix = train_matrix_filled.T
item_similarity = cosine_similarity(item_rating_matrix)
item_similarity_df = pd.DataFrame(item_similarity, index=item_rating_matrix.index, columns=item_rating_matrix.index)

data = pd.merge(ratings, movies, on='movieId')
data['genres'] = data['genres'].fillna('')
vectorizer = TfidfVectorizer(token_pattern=r'[a-zA-Z0-9\-]+')
tfidf_matrix = vectorizer.fit_transform(data['genres'].values)
movie_ids = data['movieId'].values
unique_movie_ids, indices = np.unique(movie_ids, return_index=True)
movie_id_to_index = {mid: idx for idx, mid in enumerate(unique_movie_ids)}
movie_genre_matrix = tfidf_matrix[indices]

def get_movie_poster(title):
    if not OMDB_API_KEY:
        return ''
    try:
        response = requests.get(f"http://www.omdbapi.com/?t={title}&apikey={OMDB_API_KEY}")
        data = response.json()
        return data.get('Poster', '')
    except:
        return ''

def user_cf_recommend(user_id):
    try:
        user_id = int(user_id)
        if user_id not in user_similarity_df.index:
            return "User ID not found."
        
        similar_users = user_similarity_df[user_id].drop(user_id)
        top_similar_users = similar_users.sort_values(ascending=False).head(10)
        
        scores = {}
        sim_sums = {}
        
        for other_user, similarity in top_similar_users.items():
            other_ratings = train_matrix.loc[other_user].dropna()
            for movie_id, rating in other_ratings.items():
                if movie_id not in train_matrix.loc[user_id] or pd.isna(train_matrix.loc[user_id, movie_id]):
                    scores[movie_id] = scores.get(movie_id, 0) + similarity * rating
                    sim_sums[movie_id] = sim_sums.get(movie_id, 0) + abs(similarity)
        
        ranked_movies = sorted([(movie_id, score / sim_sums[movie_id]) for movie_id, score in scores.items() if sim_sums[movie_id] > 0],
                              key=lambda x: x[1], reverse=True)[:5]
        
        result = []
        for movie_id, score in ranked_movies:
            title = movie_lookup.get(movie_id, 'Unknown')
            poster = get_movie_poster(title)
            result.append((title, round(score, 2), poster))
        
        return result
    except:
        return "Invalid input."

def item_cf_recommend(movie_title):
    movie_title = movie_title.lower().strip()
    if movie_title not in reverse_movie_lookup:
        return "Movie not found."
    
    target_movie_id = reverse_movie_lookup[movie_title]
    
    if target_movie_id not in item_similarity_df:
        return "No similarity data available."
    
    similar_scores = item_similarity_df[target_movie_id].drop(target_movie_id)
    top_similar_ids = similar_scores.sort_values(ascending=False).head(5).index
    
    result = []
    for mid in top_similar_ids:
        title = movie_lookup.get(mid, 'Unknown')
        poster = get_movie_poster(title)
        result.append((title, poster))
    
    return result

def cb_recommend(movie_title):
    movie_title = movie_title.strip().lower()
    movies['title_lower'] = movies['title'].str.lower()
    
    if movie_title not in movies['title_lower'].values:
        return "Movie not found."
    
    input_index = movies[movies['title_lower'] == movie_title].index[0]
    movie_id = movies.loc[input_index, 'movieId']
    
    if movie_id not in movie_id_to_index:
        return "No genre data available."
    
    input_vec = movie_genre_matrix[movie_id_to_index[movie_id]]
    sims = cosine_similarity(input_vec, movie_genre_matrix).flatten()
    sim_indices = sims.argsort()[::-1]
    
    seen = set()
    result = []
    for i in sim_indices:
        rec_movie_id = unique_movie_ids[i]
        title = movies[movies['movieId'] == rec_movie_id]['title'].values[0]
        if title.lower() != movie_title and title not in seen:
            poster = get_movie_poster(title)
            result.append((title, poster))
            seen.add(title)
        if len(result) == 5:
            break
    
    return result

def format_recommendations(recommendations):
    if isinstance(recommendations, str):
        return recommendations
    
    formatted = []
    for item in recommendations:
        if len(item) == 3:
            title, score, poster = item
            if poster:
                formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b><br>Predicted rating: {score}</div></div>")
            else:
                formatted.append(f"<div><b>{title}</b><br>Predicted rating: {score}</div>")
        else:
            title, poster = item
            if poster:
                formatted.append(f"<div style='display:flex;margin-bottom:10px;'><img src='{poster}' style='width:80px;height:120px;object-fit:cover;margin-right:10px;'><div><b>{title}</b></div></div>")
            else:
                formatted.append(f"<div><b>{title}</b></div>")
    
    return "<br>".join(formatted)

def respond(message, history):
    message = message.lower().strip()
    
    if message.startswith("recommend for user"):
        try:
            user_id = int(message.split()[-1])
            recs = user_cf_recommend(user_id)
            return format_recommendations(recs)
        except:
            return "Please provide a valid user ID after 'recommend for user'"
    
    elif message.startswith("similar to"):
        movie_title = message[10:].strip()
        recs = item_cf_recommend(movie_title)
        return format_recommendations(recs)
    
    elif message.startswith("recommend like"):
        movie_title = message[14:].strip()
        recs = cb_recommend(movie_title)
        return format_recommendations(recs)
    
    else:
        return "Available commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'"

demo = gr.ChatInterface(
    respond,
    title="Movie Recommendation Chatbot",
    description="Ask for recommendations using these commands:\n1. 'recommend for user [ID]'\n2. 'similar to [Movie Title]'\n3. 'recommend like [Movie Title]'",
    examples=[
        ["recommend for user 42"],
        ["similar to Toy Story"],
        ["recommend like The Dark Knight"]
    ]
)

demo.launch()