Spaces:

kmrmanish
/

LPI_Course_Recommendation_System

Sleeping

App Files Files Community

LPI_Course_Recommendation_System / app.py

kmrmanish

Update app.py

d7c2b1e over 2 years ago

raw

history blame

2.71 kB

	import streamlit as st
	import difflib
	import pandas as pd
	import numpy as np


	# for text data preprocessing
	import re
	import nltk
	nltk.download('stopwords')
	from nltk.corpus import stopwords
	from nltk.stem.porter import PorterStemmer
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity


	lpi_df = pd.read_csv('Learning Pathway Index.csv')

	lpi_df.rename(columns={"Course / Learning material": "Course_Learning_Material",
	"Course Level": "Course_Level",
	"Type (Free or Paid)":"Type",
	"Module / Sub-module \nDifficulty level": "Difficulty_Level",
	"Keywords / Tags / Skills / Interests / Categories":"Keywords"
	}, inplace=True)

	lpi_df['combined_features'] = lpi_df['Course_Learning_Material']+' '+lpi_df['Source']+' '+lpi_df['Course_Level']+' '+lpi_df['Type']+' '+lpi_df['Module']+' '+lpi_df['Difficulty_Level']+' '+lpi_df['Keywords']



	combined_features = lpi_df['combined_features']

	porter_stemmer = PorterStemmer()


	def stemming(content):
	stemmed_content = re.sub('[^a-zA-Z]',' ',content)
	stemmed_content = stemmed_content.lower()
	stemmed_content = stemmed_content.split()
	stemmed_content = [porter_stemmer.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
	stemmed_content = ' '.join(stemmed_content)
	return stemmed_content

	combined_features = combined_features.apply(stemming)


	vectorizer = TfidfVectorizer()

	vectorizer.fit(combined_features)

	combined_features = vectorizer.transform(combined_features)

	similarity = cosine_similarity(combined_features)




	st.title('Course Recommendation App')

	user_input = st.text_input('Enter What You Want to Learn : ')

	if user_input:
	list_of_all_titles = lpi_df['Module'].tolist()
	find_close_match = difflib.get_close_matches(user_input, list_of_all_titles)

	if find_close_match:
	close_match = find_close_match[0]
	index_of_the_course = lpi_df[lpi_df.Module == close_match].index.values[0]
	similarity_score = list(enumerate(similarity[index_of_the_course]))
	sorted_similar_course = sorted(similarity_score, key=lambda x: x[1], reverse=True)

	st.write('Courses suggested for you :')

	i = 1
	for course in sorted_similar_course:
	index = course[0]
	title_from_index = lpi_df[lpi_df.index == index]['Module'].values[0]
	if i < 30:
	st.write(f"{i}. {title_from_index}")
	i += 1

	if i == 1:
	st.write('No close matches found.')
	else:
	st.write('No close matches found.')