import pandas as pd import gradio as gr pip install sklearn pip install SentenceTransformer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer import faiss import numpy as np # Load dataset df = pd.read_csv('nlp_test_data_final.csv') def recommend_courses(user_skills, user_level, df): # Combine prerequisites and difficulty into a single feature df['combined_features'] = df['Prerequisites'].fillna('') + ', ' + df['course_difficulty'].fillna('') # Convert text data into numerical format using TF-IDF vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform(df['combined_features']) # Transform user input into the same format user_input = ', '.join(user_skills) + ', ' + user_level user_vector = vectorizer.transform([user_input]) # Compute cosine similarity using TF-IDF similarities_tfidf = cosine_similarity(user_vector, tfidf_matrix).flatten() # Sentence BERT model for embeddings model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') course_embeddings = model.encode(df['combined_features'].tolist(), convert_to_numpy=True) user_embedding = model.encode([user_input], convert_to_numpy=True) # FAISS for vector similarity search index = faiss.IndexFlatL2(course_embeddings.shape[1]) index.add(course_embeddings) _, similarities_faiss = index.search(user_embedding, len(df)) similarities_faiss = 1 / (1 + similarities_faiss.flatten()) # Convert distances to similarity scores # Store similarity scores df['similarity_tfidf'] = similarities_tfidf df['similarity_bert'] = similarities_bert = cosine_similarity(user_embedding, course_embeddings).flatten() df['similarity_faiss'] = similarities_faiss # Sort by highest similarity score from any model recommended_courses = df.sort_values(by=['similarity_tfidf', 'similarity_bert', 'similarity_faiss'], ascending=False).head(5) # Plot similarity scores plt.figure(figsize=(8, 5)) plt.bar(['TF-IDF', 'Sentence-BERT', 'FAISS'], [max(similarities_tfidf), max(similarities_bert), max(similarities_faiss)]) plt.xlabel('Model') plt.ylabel('Max Similarity Score') plt.title('Comparison of Similarity Scores Across Models') plt.savefig('/content/similarity_comparison.png') return recommended_courses[['course_title', 'course_organization', 'course_difficulty', 'course_rating', 'similarity_tfidf', 'similarity_bert', 'similarity_faiss']] def gradio_interface(user_skills, user_level): user_skills_list = user_skills.split(', ') recommended_courses = recommend_courses(user_skills_list, user_level, df) return recommended_courses # Gradio UI iface = gr.Interface( fn=gradio_interface, inputs=[ gr.Textbox(label="Enter your skills (comma-separated)"), gr.Dropdown(["Beginner", "Intermediate", "Mixed"], label="Select Difficulty Level") ], outputs=gr.Dataframe(headers=["Course Title", "Organization", "Difficulty", "Rating", "TF-IDF Score", "BERT Score", "FAISS Score"], label="Recommended Courses") ) if __name__ == "__main__": iface.launch()