Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ from torch_geometric.data import Data
|
|
| 6 |
from torch_geometric.nn import GATConv
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
| 9 |
# Define the GATConv model architecture
|
| 10 |
class ModeratelySimplifiedGATConvModel(torch.nn.Module):
|
| 11 |
def __init__(self, in_channels, hidden_channels, out_channels):
|
|
@@ -60,33 +61,42 @@ def get_similar_and_recommend(input_text):
|
|
| 60 |
input_embedding = model_bert.encode([input_text])[0]
|
| 61 |
similarities = cosine_similarity([input_embedding], embeddings_matrix)[0]
|
| 62 |
|
| 63 |
-
|
| 64 |
-
user_keywords = input_text.split() # Create a list of keywords from user input
|
| 65 |
-
weight = 1.0 # Initial weight factor
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
weighted_similarities = similarities * weight # Apply the weight to the similarity score
|
| 72 |
|
| 73 |
-
|
|
|
|
|
|
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
|
|
|
| 77 |
|
| 78 |
-
# Recommend the top 10 videos based on GNN embeddings and dot product
|
| 79 |
-
def recommend_next_10_videos(given_video_index, all_video_embeddings):
|
| 80 |
dot_products = [
|
| 81 |
-
torch.dot(all_video_embeddings[given_video_index], all_video_embeddings[i])
|
| 82 |
for i in range(all_video_embeddings.shape[0])
|
| 83 |
]
|
| 84 |
dot_products[given_video_index] = -float("inf")
|
| 85 |
|
| 86 |
-
top_10_indices = np.argsort(dot_products)[::-1][:10]
|
| 87 |
return [df.iloc[idx].to_dict() for idx in top_10_indices]
|
| 88 |
|
| 89 |
-
top_10_recommended_videos_features = recommend_next_10_videos(
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Exclude unwanted features for recommended videos
|
| 92 |
for recommended_video in top_10_recommended_videos_features:
|
|
@@ -95,11 +105,11 @@ def get_similar_and_recommend(input_text):
|
|
| 95 |
if "embeddings" in recommended_video:
|
| 96 |
del recommended_video["embeddings"]
|
| 97 |
|
| 98 |
-
# Create the output JSON with
|
| 99 |
output = {
|
| 100 |
"search_context": {
|
| 101 |
"input_text": input_text,
|
| 102 |
-
"weight": weight, #
|
| 103 |
},
|
| 104 |
"most_similar_video": most_similar_video_features,
|
| 105 |
"top_10_recommended_videos": top_10_recommended_videos_features,
|
|
@@ -107,13 +117,13 @@ def get_similar_and_recommend(input_text):
|
|
| 107 |
|
| 108 |
return output
|
| 109 |
|
| 110 |
-
# Update the Gradio interface to output JSON with
|
| 111 |
interface = gr.Interface(
|
| 112 |
fn=get_similar_and_recommend,
|
| 113 |
inputs=gr.Textbox(label="Enter Text to Find Most Similar Video"),
|
| 114 |
outputs=gr.JSON(),
|
| 115 |
title="Video Recommendation System with GNN-based Recommendations",
|
| 116 |
-
description="Enter text to find the most similar video and get top 10 recommended videos with search context
|
| 117 |
)
|
| 118 |
|
| 119 |
interface.launch()
|
|
|
|
| 6 |
from torch_geometric.nn import GATConv
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 9 |
+
|
| 10 |
# Define the GATConv model architecture
|
| 11 |
class ModeratelySimplifiedGATConvModel(torch.nn.Module):
|
| 12 |
def __init__(self, in_channels, hidden_channels, out_channels):
|
|
|
|
| 61 |
input_embedding = model_bert.encode([input_text])[0]
|
| 62 |
similarities = cosine_similarity([input_embedding], embeddings_matrix)[0]
|
| 63 |
|
| 64 |
+
most_similar_index = np.argmax(similarities) # Use unweighted scores for the most similar video
|
|
|
|
|
|
|
| 65 |
|
| 66 |
+
# Get all features of the most similar video
|
| 67 |
+
most_similar_video_features = df.iloc[most_similar_index].to_dict()
|
| 68 |
+
# Get all features of the most similar video
|
| 69 |
+
most_similar_video_features = df.iloc[most_similar_index].to_dict()
|
| 70 |
+
|
| 71 |
+
# Remove the "embeddings" key from most_similar_video_features
|
| 72 |
+
if "embeddings" in most_similar_video_features:
|
| 73 |
+
del most_similar_video_features["embeddings"]
|
| 74 |
+
if "text_for_embedding" in most_similar_video_features:
|
| 75 |
+
del most_similar_video_features["text_for_embedding"]
|
| 76 |
|
|
|
|
| 77 |
|
| 78 |
+
# Apply search context weight for GNN recommendations
|
| 79 |
+
user_keywords = input_text.split() # Create a list of keywords from user input
|
| 80 |
+
weight = 1.0 # Initial weight factor
|
| 81 |
|
| 82 |
+
for keyword in user_keywords:
|
| 83 |
+
if keyword.lower() in df["title"].str.lower().tolist(): # Check for matching keywords
|
| 84 |
+
weight += 0.1 # Increase weight for each match
|
| 85 |
|
| 86 |
+
# Recommend the top 10 videos based on GNN embeddings and weighted dot product
|
| 87 |
+
def recommend_next_10_videos(given_video_index, all_video_embeddings, weight):
|
| 88 |
dot_products = [
|
| 89 |
+
torch.dot(all_video_embeddings[given_video_index], all_video_embeddings[i]) * weight
|
| 90 |
for i in range(all_video_embeddings.shape[0])
|
| 91 |
]
|
| 92 |
dot_products[given_video_index] = -float("inf")
|
| 93 |
|
| 94 |
+
top_10_indices = np.argsort(dot_products)[[::-1][:10]
|
| 95 |
return [df.iloc[idx].to_dict() for idx in top_10_indices]
|
| 96 |
|
| 97 |
+
top_10_recommended_videos_features = recommend_next_10_videos(
|
| 98 |
+
most_similar_index, all_video_embeddings, weight
|
| 99 |
+
)
|
| 100 |
|
| 101 |
# Exclude unwanted features for recommended videos
|
| 102 |
for recommended_video in top_10_recommended_videos_features:
|
|
|
|
| 105 |
if "embeddings" in recommended_video:
|
| 106 |
del recommended_video["embeddings"]
|
| 107 |
|
| 108 |
+
# Create the output JSON with the search context
|
| 109 |
output = {
|
| 110 |
"search_context": {
|
| 111 |
"input_text": input_text,
|
| 112 |
+
"weight": weight, # Weight applied to the GNN recommendations
|
| 113 |
},
|
| 114 |
"most_similar_video": most_similar_video_features,
|
| 115 |
"top_10_recommended_videos": top_10_recommended_videos_features,
|
|
|
|
| 117 |
|
| 118 |
return output
|
| 119 |
|
| 120 |
+
# Update the Gradio interface to output JSON with search context for GNN recommendations
|
| 121 |
interface = gr.Interface(
|
| 122 |
fn=get_similar_and_recommend,
|
| 123 |
inputs=gr.Textbox(label="Enter Text to Find Most Similar Video"),
|
| 124 |
outputs=gr.JSON(),
|
| 125 |
title="Video Recommendation System with GNN-based Recommendations",
|
| 126 |
+
description="Enter text to find the most similar video and get top 10 recommended videos with search context applied to GNN results.",
|
| 127 |
)
|
| 128 |
|
| 129 |
interface.launch()
|