from sklearn.metrics.pairwise import cosine_similarity import numpy as np import gradio as gr import pandas as pd from sentence_transformers import SentenceTransformer # טען את המודל model = SentenceTransformer("all-MiniLM-L6-v2") # טען את הדאטהסט מהלינק url = "https://huggingface.co/datasets/Pablinho/movies-dataset/resolve/main/9000plus.csv" print("Loading dataset...") dataset = pd.read_csv(url) # ודא שהעמודות קיימות assert "Title" in dataset.columns assert "Overview" in dataset.columns # נקה שורות עם Overview חסר או לא מחרוזת dataset = dataset.dropna(subset=["Overview"]) dataset = dataset[dataset["Overview"].apply(lambda x: isinstance(x, str))] # הגבל ל־500 סרטים MAX_MOVIES = 500 dataset = dataset.head(MAX_MOVIES) print(f"Encoding {len(dataset)} movie descriptions...") dataset["embeddings"] = dataset["Overview"].apply(lambda x: model.encode(x).tolist()) print("Done encoding!") def recommend_similar_movies(input_text, top_n=5): input_embedding = model.encode([input_text]) similarities = cosine_similarity(input_embedding, np.vstack(dataset['embeddings'].to_numpy()))[0] top_indices = similarities.argsort()[::-1][:top_n] results = dataset.iloc[top_indices][['Title', 'Overview']] return "\n\n".join(f"🎬 **{row['Title']}**\n{row['Overview']}" for _, row in results.iterrows()) demo = gr.Interface( fn=recommend_similar_movies, inputs=gr.Textbox(lines=2, placeholder="Describe a movie..."), outputs="text", title="Movie Recommender", description="Get movie recommendations based on your description. Powered by sentence-transformers and cosine similarity." ) if __name__ == "__main__": demo.launch()