Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import gradio as gr | |
| pip install sklearn | |
| pip install SentenceTransformer | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| # Load dataset | |
| df = pd.read_csv('nlp_test_data_final.csv') | |
| def recommend_courses(user_skills, user_level, df): | |
| # Combine prerequisites and difficulty into a single feature | |
| df['combined_features'] = df['Prerequisites'].fillna('') + ', ' + df['course_difficulty'].fillna('') | |
| # Convert text data into numerical format using TF-IDF | |
| vectorizer = TfidfVectorizer() | |
| tfidf_matrix = vectorizer.fit_transform(df['combined_features']) | |
| # Transform user input into the same format | |
| user_input = ', '.join(user_skills) + ', ' + user_level | |
| user_vector = vectorizer.transform([user_input]) | |
| # Compute cosine similarity using TF-IDF | |
| similarities_tfidf = cosine_similarity(user_vector, tfidf_matrix).flatten() | |
| # Sentence BERT model for embeddings | |
| model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') | |
| course_embeddings = model.encode(df['combined_features'].tolist(), convert_to_numpy=True) | |
| user_embedding = model.encode([user_input], convert_to_numpy=True) | |
| # FAISS for vector similarity search | |
| index = faiss.IndexFlatL2(course_embeddings.shape[1]) | |
| index.add(course_embeddings) | |
| _, similarities_faiss = index.search(user_embedding, len(df)) | |
| similarities_faiss = 1 / (1 + similarities_faiss.flatten()) # Convert distances to similarity scores | |
| # Store similarity scores | |
| df['similarity_tfidf'] = similarities_tfidf | |
| df['similarity_bert'] = similarities_bert = cosine_similarity(user_embedding, course_embeddings).flatten() | |
| df['similarity_faiss'] = similarities_faiss | |
| # Sort by highest similarity score from any model | |
| recommended_courses = df.sort_values(by=['similarity_tfidf', 'similarity_bert', 'similarity_faiss'], ascending=False).head(5) | |
| # Plot similarity scores | |
| plt.figure(figsize=(8, 5)) | |
| plt.bar(['TF-IDF', 'Sentence-BERT', 'FAISS'], [max(similarities_tfidf), max(similarities_bert), max(similarities_faiss)]) | |
| plt.xlabel('Model') | |
| plt.ylabel('Max Similarity Score') | |
| plt.title('Comparison of Similarity Scores Across Models') | |
| plt.savefig('/content/similarity_comparison.png') | |
| return recommended_courses[['course_title', 'course_organization', 'course_difficulty', 'course_rating', 'similarity_tfidf', 'similarity_bert', 'similarity_faiss']] | |
| def gradio_interface(user_skills, user_level): | |
| user_skills_list = user_skills.split(', ') | |
| recommended_courses = recommend_courses(user_skills_list, user_level, df) | |
| return recommended_courses | |
| # Gradio UI | |
| iface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=[ | |
| gr.Textbox(label="Enter your skills (comma-separated)"), | |
| gr.Dropdown(["Beginner", "Intermediate", "Mixed"], label="Select Difficulty Level") | |
| ], | |
| outputs=gr.Dataframe(headers=["Course Title", "Organization", "Difficulty", "Rating", "TF-IDF Score", "BERT Score", "FAISS Score"], label="Recommended Courses") | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |