Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
import faiss
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
# Load dataset
|
| 11 |
+
df = pd.read_csv('/content/nlp_test_data_final.csv')
|
| 12 |
+
|
| 13 |
+
def recommend_courses(user_skills, user_level, df):
|
| 14 |
+
# Combine prerequisites and difficulty into a single feature
|
| 15 |
+
df['combined_features'] = df['Prerequisites'].fillna('') + ', ' + df['course_difficulty'].fillna('')
|
| 16 |
+
|
| 17 |
+
# Convert text data into numerical format using TF-IDF
|
| 18 |
+
vectorizer = TfidfVectorizer()
|
| 19 |
+
tfidf_matrix = vectorizer.fit_transform(df['combined_features'])
|
| 20 |
+
|
| 21 |
+
# Transform user input into the same format
|
| 22 |
+
user_input = ', '.join(user_skills) + ', ' + user_level
|
| 23 |
+
user_vector = vectorizer.transform([user_input])
|
| 24 |
+
|
| 25 |
+
# Compute cosine similarity using TF-IDF
|
| 26 |
+
similarities_tfidf = cosine_similarity(user_vector, tfidf_matrix).flatten()
|
| 27 |
+
|
| 28 |
+
# Sentence BERT model for embeddings
|
| 29 |
+
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
|
| 30 |
+
course_embeddings = model.encode(df['combined_features'].tolist(), convert_to_numpy=True)
|
| 31 |
+
user_embedding = model.encode([user_input], convert_to_numpy=True)
|
| 32 |
+
|
| 33 |
+
# FAISS for vector similarity search
|
| 34 |
+
index = faiss.IndexFlatL2(course_embeddings.shape[1])
|
| 35 |
+
index.add(course_embeddings)
|
| 36 |
+
_, similarities_faiss = index.search(user_embedding, len(df))
|
| 37 |
+
similarities_faiss = 1 / (1 + similarities_faiss.flatten()) # Convert distances to similarity scores
|
| 38 |
+
|
| 39 |
+
# Store similarity scores
|
| 40 |
+
df['similarity_tfidf'] = similarities_tfidf
|
| 41 |
+
df['similarity_bert'] = similarities_bert = cosine_similarity(user_embedding, course_embeddings).flatten()
|
| 42 |
+
df['similarity_faiss'] = similarities_faiss
|
| 43 |
+
|
| 44 |
+
# Sort by highest similarity score from any model
|
| 45 |
+
recommended_courses = df.sort_values(by=['similarity_tfidf', 'similarity_bert', 'similarity_faiss'], ascending=False).head(5)
|
| 46 |
+
|
| 47 |
+
# Plot similarity scores
|
| 48 |
+
plt.figure(figsize=(8, 5))
|
| 49 |
+
plt.bar(['TF-IDF', 'Sentence-BERT', 'FAISS'], [max(similarities_tfidf), max(similarities_bert), max(similarities_faiss)])
|
| 50 |
+
plt.xlabel('Model')
|
| 51 |
+
plt.ylabel('Max Similarity Score')
|
| 52 |
+
plt.title('Comparison of Similarity Scores Across Models')
|
| 53 |
+
plt.savefig('/content/similarity_comparison.png')
|
| 54 |
+
|
| 55 |
+
return recommended_courses[['course_title', 'course_organization', 'course_difficulty', 'course_rating', 'similarity_tfidf', 'similarity_bert', 'similarity_faiss']]
|
| 56 |
+
|
| 57 |
+
def gradio_interface(user_skills, user_level):
|
| 58 |
+
user_skills_list = user_skills.split(', ')
|
| 59 |
+
recommended_courses = recommend_courses(user_skills_list, user_level, df)
|
| 60 |
+
return recommended_courses
|
| 61 |
+
|
| 62 |
+
# Gradio UI
|
| 63 |
+
iface = gr.Interface(
|
| 64 |
+
fn=gradio_interface,
|
| 65 |
+
inputs=[
|
| 66 |
+
gr.Textbox(label="Enter your skills (comma-separated)"),
|
| 67 |
+
gr.Dropdown(["Beginner", "Intermediate", "Mixed"], label="Select Difficulty Level")
|
| 68 |
+
],
|
| 69 |
+
outputs=gr.Dataframe(headers=["Course Title", "Organization", "Difficulty", "Rating", "TF-IDF Score", "BERT Score", "FAISS Score"], label="Recommended Courses")
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
if __name__ == "__main__":
|
| 73 |
+
iface.launch()
|