| import streamlit as st |
| import difflib |
| import pandas as pd |
| import numpy as np |
|
|
|
|
| |
| import re |
| import nltk |
| nltk.download('stopwords') |
| from nltk.corpus import stopwords |
| from nltk.stem.porter import PorterStemmer |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
| lpi_df = pd.read_csv('Learning Pathway Index.csv') |
|
|
| lpi_df.rename(columns={"Course / Learning material": "Course_Learning_Material", |
| "Course Level": "Course_Level", |
| "Type (Free or Paid)":"Type", |
| "Module / Sub-module \nDifficulty level": "Difficulty_Level", |
| "Keywords / Tags / Skills / Interests / Categories":"Keywords" |
| }, inplace=True) |
|
|
| lpi_df['combined_features'] = lpi_df['Course_Learning_Material']+' '+lpi_df['Source']+' '+lpi_df['Course_Level']+' '+lpi_df['Type']+' '+lpi_df['Module']+' '+lpi_df['Difficulty_Level']+' '+lpi_df['Keywords'] |
|
|
|
|
|
|
| combined_features = lpi_df['combined_features'] |
|
|
| porter_stemmer = PorterStemmer() |
|
|
|
|
| def stemming(content): |
| stemmed_content = re.sub('[^a-zA-Z]',' ',content) |
| stemmed_content = stemmed_content.lower() |
| stemmed_content = stemmed_content.split() |
| stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')] |
| stemmed_content = ' '.join(stemmed_content) |
| return stemmed_content |
|
|
| combined_features = combined_features.apply(stemming) |
|
|
|
|
| vectorizer = TfidfVectorizer() |
|
|
| vectorizer.fit(combined_features) |
|
|
| combined_features = vectorizer.transform(combined_features) |
|
|
| similarity = cosine_similarity(combined_features) |
|
|
|
|
|
|
|
|
| st.title('Course Recommendation App') |
|
|
| user_input = st.text_input('Enter What You Want to Learn : ') |
|
|
| if user_input: |
| list_of_all_titles = lpi_df['Module'].tolist() |
| find_close_match = difflib.get_close_matches(user_input, list_of_all_titles) |
|
|
| if find_close_match: |
| close_match = find_close_match[0] |
| index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0] |
| similarity_score = list(enumerate(similarity[index_of_the_course])) |
| sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True) |
|
|
| st.write('Courses suggested for you :') |
|
|
| i = 1 |
| for course in sorted_similar_course: |
| index = course[0] |
| title_from_index = lpi_df[lpi_df.index == index]['Module'].values[0] |
| if i < 30: |
| st.write(f"{i}. {title_from_index}") |
| i += 1 |
|
|
| if i == 1: |
| st.write('No close matches found.') |
| else: |
| st.write('No close matches found.') |
|
|
|
|