| | from sklearn.metrics.pairwise import cosine_similarity |
| | import numpy as np |
| | import gradio as gr |
| | import pandas as pd |
| | from sentence_transformers import SentenceTransformer |
| |
|
| | |
| | model = SentenceTransformer("all-MiniLM-L6-v2") |
| |
|
| | |
| | url = "https://huggingface.co/datasets/Pablinho/movies-dataset/resolve/main/9000plus.csv" |
| | print("Loading dataset...") |
| | dataset = pd.read_csv(url) |
| |
|
| | |
| | assert "Title" in dataset.columns |
| | assert "Overview" in dataset.columns |
| |
|
| | print("Encoding movie descriptions...") |
| | dataset["embeddings"] = dataset["Overview"].apply(lambda x: model.encode(x).tolist()) |
| | print("Done encoding!") |
| |
|
| | def recommend_similar_movies(input_text, top_n=5): |
| | input_embedding = model.encode([input_text]) |
| | similarities = cosine_similarity(input_embedding, np.vstack(dataset['embeddings'].to_numpy()))[0] |
| | top_indices = similarities.argsort()[::-1][:top_n] |
| | results = dataset.iloc[top_indices][['Title', 'Overview']] |
| | return "\n\n".join(f"馃幀 **{row['Title']}**\n{row['Overview']}" for _, row in results.iterrows()) |
| |
|
| | demo = gr.Interface( |
| | fn=recommend_similar_movies, |
| | inputs=gr.Textbox(lines=2, placeholder="Describe a movie..."), |
| | outputs="text", |
| | title="Movie Recommender", |
| | description="Get movie recommendations based on your description. Powered by sentence-transformers and cosine similarity." |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|