tiya1012's picture
Update app.py
0c9165b verified
import pandas as pd
import gradio as gr
pip install sklearn
pip install SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
# Load dataset
df = pd.read_csv('nlp_test_data_final.csv')
def recommend_courses(user_skills, user_level, df):
# Combine prerequisites and difficulty into a single feature
df['combined_features'] = df['Prerequisites'].fillna('') + ', ' + df['course_difficulty'].fillna('')
# Convert text data into numerical format using TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['combined_features'])
# Transform user input into the same format
user_input = ', '.join(user_skills) + ', ' + user_level
user_vector = vectorizer.transform([user_input])
# Compute cosine similarity using TF-IDF
similarities_tfidf = cosine_similarity(user_vector, tfidf_matrix).flatten()
# Sentence BERT model for embeddings
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
course_embeddings = model.encode(df['combined_features'].tolist(), convert_to_numpy=True)
user_embedding = model.encode([user_input], convert_to_numpy=True)
# FAISS for vector similarity search
index = faiss.IndexFlatL2(course_embeddings.shape[1])
index.add(course_embeddings)
_, similarities_faiss = index.search(user_embedding, len(df))
similarities_faiss = 1 / (1 + similarities_faiss.flatten()) # Convert distances to similarity scores
# Store similarity scores
df['similarity_tfidf'] = similarities_tfidf
df['similarity_bert'] = similarities_bert = cosine_similarity(user_embedding, course_embeddings).flatten()
df['similarity_faiss'] = similarities_faiss
# Sort by highest similarity score from any model
recommended_courses = df.sort_values(by=['similarity_tfidf', 'similarity_bert', 'similarity_faiss'], ascending=False).head(5)
# Plot similarity scores
plt.figure(figsize=(8, 5))
plt.bar(['TF-IDF', 'Sentence-BERT', 'FAISS'], [max(similarities_tfidf), max(similarities_bert), max(similarities_faiss)])
plt.xlabel('Model')
plt.ylabel('Max Similarity Score')
plt.title('Comparison of Similarity Scores Across Models')
plt.savefig('/content/similarity_comparison.png')
return recommended_courses[['course_title', 'course_organization', 'course_difficulty', 'course_rating', 'similarity_tfidf', 'similarity_bert', 'similarity_faiss']]
def gradio_interface(user_skills, user_level):
user_skills_list = user_skills.split(', ')
recommended_courses = recommend_courses(user_skills_list, user_level, df)
return recommended_courses
# Gradio UI
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Enter your skills (comma-separated)"),
gr.Dropdown(["Beginner", "Intermediate", "Mixed"], label="Select Difficulty Level")
],
outputs=gr.Dataframe(headers=["Course Title", "Organization", "Difficulty", "Rating", "TF-IDF Score", "BERT Score", "FAISS Score"], label="Recommended Courses")
)
if __name__ == "__main__":
iface.launch()