Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from textwrap3 import wrap | |
| from flashtext import KeywordProcessor | |
| import torch, random, nltk, string, traceback, sys, os, requests, datetime | |
| import numpy as np | |
| import pandas as pd | |
| from transformers import T5ForConditionalGeneration,T5Tokenizer | |
| import pke | |
| from helper import postprocesstext, summarizer, get_nouns_multipartite, get_keywords,\ | |
| get_question, get_related_word, get_final_option_list, load_raw_text | |
| def set_seed(seed: int): | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| torch.manual_seed(seed) | |
| torch.cuda.manual_seed_all(seed) | |
| set_seed(42) | |
| def load_model(): | |
| nltk.download('punkt') | |
| nltk.download('brown') | |
| nltk.download('wordnet') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| nltk.download('omw-1.4') | |
| summary_mod_name = os.environ["summary_mod_name"] | |
| question_mod_name = os.environ["question_mod_name"] | |
| summary_model = T5ForConditionalGeneration.from_pretrained(summary_mod_name) | |
| summary_tokenizer = T5Tokenizer.from_pretrained(summary_mod_name) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| summary_model = summary_model.to(device) | |
| question_model = T5ForConditionalGeneration.from_pretrained(question_mod_name) | |
| question_tokenizer = T5Tokenizer.from_pretrained(question_mod_name) | |
| question_model = question_model.to(device) | |
| return summary_model, summary_tokenizer, question_tokenizer, question_model | |
| from nltk.corpus import wordnet as wn | |
| from nltk.tokenize import sent_tokenize | |
| from nltk.corpus import stopwords | |
| def csv_downloader(df): | |
| res = df.to_csv(index=False,sep="\t").encode('utf-8') | |
| st.download_button( | |
| label="Download logs data as CSV separated by tab", | |
| data=res, | |
| file_name='df_quiz_log_file_v1.csv', | |
| mime='text/csv') | |
| def load_file(): | |
| """Load text from file""" | |
| uploaded_file = st.file_uploader("Upload Files",type=['txt']) | |
| if uploaded_file is not None: | |
| if uploaded_file.type == "text/plain": | |
| raw_text = str(uploaded_file.read(),"utf-8") | |
| return raw_text | |
| st.markdown('') | |
| # Loading Model | |
| summary_model, summary_tokenizer, question_tokenizer, question_model =load_model() | |
| # App title and description | |
| st.title("Exam Assistant") | |
| st.write("Upload text, Get ready for answering autogenerated questions") | |
| # Load file | |
| st.text("Disclaimer: This app stores user's input for model improvement purposes !!") | |
| # Load file | |
| default_text = load_raw_text() | |
| raw_text = st.text_area("Enter text here", default_text, height=250, max_chars=1000000, ) | |
| # raw_text = load_file() | |
| start_time = str(datetime.datetime.now()) | |
| if raw_text != None and raw_text != '': | |
| summary_text = summarizer(raw_text,summary_model,summary_tokenizer) | |
| ans_list = get_keywords(raw_text,summary_text) | |
| #print("Ans list: {}".format(ans_list)) | |
| questions = [] | |
| option1=[] | |
| option2=[] | |
| option3=[] | |
| option4=[] | |
| for idx,ans in enumerate(ans_list): | |
| #print("IDX: {}, ANS: {}".format(idx, ans)) | |
| ques = get_question(summary_text,ans,question_model,question_tokenizer) | |
| other_options = get_related_word(ans) | |
| final_options, ans_index = get_final_option_list(ans,other_options) | |
| option1.append(final_options[0]) | |
| option2.append(final_options[1]) | |
| option3.append(final_options[2]) | |
| option4.append(final_options[3]) | |
| if ques not in questions: | |
| html_str = f""" | |
| <div> | |
| <p> | |
| {idx+1}: <b> {ques} </b> | |
| </p> | |
| </div> | |
| """ | |
| html_str += f' <p style="color:Green;"><b> {final_options[0]} </b></p> ' if ans_index == 0 else f' <p><b> {final_options[0]} </b></p> ' | |
| html_str += f' <p style="color:Green;"><b> {final_options[1]} </b></p> ' if ans_index == 1 else f' <p><b> {final_options[1]} </b></p> ' | |
| html_str += f' <p style="color:Green;"><b> {final_options[2]} </b></p> ' if ans_index == 2 else f' <p><b> {final_options[2]} </b></p> ' | |
| html_str += f' <p style="color:Green;"><b> {final_options[3]} </b></p> ' if ans_index == 3 else f' <p><b> {final_options[3]} </b></p> ' | |
| html_str += f""" | |
| """ | |
| st.markdown(html_str , unsafe_allow_html=True) | |
| st.markdown("-----") | |
| questions.append(ques) | |
| output_path = "results/df_quiz_log_file_v1.csv" | |
| res_df = pd.DataFrame({"TimeStamp":[start_time]*len(ans_list),\ | |
| "Input":[str(raw_text)]*len(ans_list),\ | |
| "Question":questions,"Option1":option1,\ | |
| "Option2":option2,\ | |
| "Option3":option3,\ | |
| "Option4":option4,\ | |
| "Correct Answer":ans_list}) | |
| res_df.to_csv(output_path, mode='a', index=False, sep="\t", header= not os.path.exists(output_path)) | |
| # st.dataframe(pd.read_csv(output_path,sep="\t").tail(5)) | |
| csv_downloader(pd.read_csv(output_path,sep="\t")) |