Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pickle | |
| import pandas as pd | |
| import nltk | |
| nltk.download('stopwords') | |
| from nltk.corpus import stopwords | |
| nltk.download('punkt') | |
| from nltk.tokenize import punkt | |
| nltk.download('wordnet') | |
| from nltk.corpus.reader import wordnet | |
| nltk.download('WordNetLemmatizer') | |
| from nltk.stem import WordNetLemmatizer | |
| def main(): | |
| """ Blooms Taxonomy classifier""" | |
| st.title("Blooms Taxonomy Classifier") | |
| st.subheader("ML App for Blooms Taxonomy Level Prediction") | |
| activities = ["Prediction","About"] | |
| choice =st.sidebar.selectbox("Choose Activity",activities ) | |
| if choice == "Prediction": | |
| path_tfidf = "tfidf.pickle" | |
| with open(path_tfidf, 'rb') as data: | |
| tfidf = pickle.load(data) | |
| category_codes = { | |
| 'BT1 - Knowledge': 0, | |
| 'BT2 - Comprehension': 1, | |
| 'BT3 - Application': 2, | |
| 'BT4 - Analysis': 3, | |
| 'BT5 - Evaluation': 4, | |
| 'BT6 - Creation': 5 | |
| } | |
| punctuation_signs = list("?:!.,;") | |
| stop_words = list(stopwords.words('english')) | |
| def create_features_from_text(text): | |
| # Dataframe creation | |
| lemmatized_text_list = [] | |
| df = pd.DataFrame(columns=['Questions']) | |
| df.loc[0] = text | |
| df['Questions_Parsed_1'] = df['Questions'].str.replace("\r", " ") | |
| df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace("\n", " ") | |
| df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace(" ", " ") | |
| df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace('"', '') | |
| df['Questions_Parsed_2'] = df['Questions_Parsed_1'].str.lower() | |
| df['Questions_Parsed_3'] = df['Questions_Parsed_2'] | |
| for punct_sign in punctuation_signs: | |
| df['Questions_Parsed_3'] = df['Questions_Parsed_3'].str.replace(punct_sign, '') | |
| df['Questions_Parsed_4'] = df['Questions_Parsed_3'].str.replace("'s", "") | |
| wordnet_lemmatizer = WordNetLemmatizer() | |
| lemmatized_list = [] | |
| text = df.loc[0]['Questions_Parsed_4'] | |
| text_words = text.split(" ") | |
| for word in text_words: | |
| lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v")) | |
| lemmatized_text = " ".join(lemmatized_list) | |
| lemmatized_text_list.append(lemmatized_text) | |
| df['Questions_Parsed_5'] = lemmatized_text_list | |
| df['Questions_Parsed_6'] = df['Questions_Parsed_5'] | |
| for stop_word in stop_words: | |
| regex_stopword = r"\b" + stop_word + r"\b" | |
| df['Questions_Parsed_6'] = df['Questions_Parsed_6'].str.replace(regex_stopword, '') | |
| df = df['Questions_Parsed_6'] | |
| df = df.rename({'Questions_Parsed_6': 'Questions_Parsed'}) | |
| # TF-IDF | |
| features = tfidf.transform(df).toarray() | |
| return features | |
| def get_category_name(category_id): | |
| for category, id_ in category_codes.items(): | |
| if id_ == category_id: | |
| return category | |
| def predict_from_text(text): | |
| path_lr = 'best_lrc.pickle' | |
| with open(path_lr, 'rb') as data: | |
| lr_model = pickle.load(data) | |
| path_mnb = 'best_mnbc.pickle' | |
| with open(path_mnb, 'rb') as data: | |
| mnb_model = pickle.load(data) | |
| path_gbc = 'best_gbc.pickle' | |
| with open(path_gbc, 'rb') as data: | |
| gbc_model = pickle.load(data) | |
| path_rfc = 'best_rfc.pickle' | |
| with open(path_rfc, 'rb') as data: | |
| rfc_model = pickle.load(data) | |
| path_knn = 'best_knnc.pickle' | |
| with open(path_knn, 'rb') as data: | |
| knn_model = pickle.load(data) | |
| path_svm = 'best_svc.pickle' | |
| with open(path_svm, 'rb') as data: | |
| svc_model = pickle.load(data) | |
| # Predict using the input model | |
| prediction_lr = lr_model.predict(create_features_from_text(text))[0] | |
| prediction_lr_proba = lr_model.predict_proba(create_features_from_text(text))[0] | |
| prediction_mnb = mnb_model.predict(create_features_from_text(text))[0] | |
| prediction_mnb_proba = mnb_model.predict_proba(create_features_from_text(text))[0] | |
| prediction_gbc = gbc_model.predict(create_features_from_text(text))[0] | |
| prediction_gbc_proba = gbc_model.predict_proba(create_features_from_text(text))[0] | |
| prediction_rfc = rfc_model.predict(create_features_from_text(text))[0] | |
| prediction_rfc_proba = svc_model.predict_proba(create_features_from_text(text))[0] | |
| prediction_knn = knn_model.predict(create_features_from_text(text))[0] | |
| prediction_knn_proba = svc_model.predict_proba(create_features_from_text(text))[0] | |
| prediction_svc = svc_model.predict(create_features_from_text(text))[0] | |
| prediction_svc_proba = svc_model.predict_proba(create_features_from_text(text))[0] | |
| # Return result | |
| category_lr = get_category_name(prediction_lr) | |
| category_mnb = get_category_name(prediction_mnb) | |
| category_gbc = get_category_name(prediction_gbc) | |
| category_rfc = get_category_name(prediction_rfc) | |
| category_knn = get_category_name(prediction_knn) | |
| category_svc = get_category_name(prediction_svc) | |
| a=prediction_lr_proba.max()*100 | |
| b=prediction_mnb_proba.max()*100 | |
| c=prediction_gbc_proba.max()*100 | |
| d=prediction_rfc_proba.max()*100 | |
| e=prediction_knn_proba.max()*100 | |
| f=prediction_svc_proba.max()*100 | |
| best_one = {"category_lr":prediction_lr_proba.max()*100,"category_mnb":prediction_mnb_proba.max()*100,"category_gbc":prediction_gbc_proba.max()*100,"category_rfc":prediction_rfc_proba.max()*100,"category_knn":prediction_knn_proba.max()*100,"category_svc":prediction_svc_proba.max()*100} | |
| keymax = max(best_one, key = best_one.get) | |
| if keymax == "category_lr": | |
| return category_lr, best_one["category_lr"],a,b,c,d,e,f | |
| elif keymax == "category_mnb": | |
| return category_mnb,best_one["category_mnb"],a,b,c,d,e,f | |
| elif keymax == "category_gbc": | |
| return category_gbc,best_one["category_gbc"],a,b,c,d,e,f | |
| elif keymax == "category_rfc": | |
| return category_rfc,best_one["category_rfc"],a,b,c,d,e,f | |
| elif keymax == "category_knn": | |
| return category_knn,best_one["category_knn"],a,b,c,d,e,f | |
| else: | |
| return category_svc,best_one["category_svc"],a,b,c,d,e,f | |
| st.info("Prediction with Various Models") | |
| bt_text = st.text_area("Question to Predict","Type Here") | |
| if st.button("Classify"): | |
| st.text("Original Text ::\n{}".format(bt_text)) | |
| prediction = predict_from_text(bt_text) | |
| st.success("Blooms Taxonomy Level :: {}".format(prediction[0])) | |
| st.success("Maximum Probability :: {}".format(prediction[1])) | |
| st.write("Performance of Various Algorithms") | |
| data = pd.DataFrame({ | |
| 'Various Algorithm': ['Logistic Regression', 'Multinomial Naive Bayes', 'Gradient Boosting Classifier','Random Forest Classifier','k-Nearest Neighbors','Support Vector Machine'], | |
| 'Maximum Accuracy': [(prediction[2]),prediction[3],prediction[4],prediction[5],prediction[6],prediction[7]], | |
| }).set_index('Various Algorithm') | |
| st.write(data) | |
| st.bar_chart(data) | |
| if choice == "About": | |
| st.success("This is used for classification of Bloom's Taxonomy Levels.") | |
| hide_streamlit_style = """ | |
| <style> | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| </style> | |
| """ | |
| st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
| if __name__ =='__main__': | |
| main() | |