Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import streamlit as st | |
| from pprint import pprint | |
| import subprocess | |
| cmd = ["python", "-m", "spacy", "download", "en_core_web_sm"] | |
| subprocess.run(cmd) | |
| from spacy.cli import download | |
| from Questgen import main, main2 | |
| from PyPDF2 import PdfReader | |
| from transformers import pipeline | |
| from PyPDF2 import PdfReader | |
| import nltk | |
| import pandas as pd | |
| nltk.download('punkt') | |
| # st.title(body='7 - Question Generation') | |
| def get_pdf_text(pdf_docs): | |
| text = "" | |
| for pdf in pdf_docs: | |
| pdf_reader = PdfReader(pdf) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| ######################################################## | |
| # st.header(body='Proposition 1',divider='red') | |
| # if st.toggle(label='Show Proposition 1'): | |
| st.title('Generate Questions from PDFs') | |
| file = st.file_uploader(label='Upload',accept_multiple_files=True) | |
| pr = st.button(label='Process') | |
| if pr: | |
| # pr = st.button(label='Process') | |
| raw_text = get_pdf_text(file) | |
| # questions = [] | |
| # ge = main.QGen() | |
| ge = main2.QGen() | |
| payload = { | |
| 'input_text' : raw_text, | |
| # 'max_questions':2, | |
| } | |
| output = ge.predict_mcq(payload=payload) | |
| st.header(body='*Generated Questions are:*', divider='orange') | |
| for question in output['questions']: | |
| st.subheader(body=f":orange[Q{question['id']}:] {question['question_statement']}", divider='blue') | |
| st.markdown(f"A: {question['answer']}") | |
| c = 0 | |
| for option in question['options']: | |
| # st.markdown(f"{c}") | |
| c+=1 | |
| if c==1: | |
| st.markdown(f"B: {option}") | |
| elif c==2: | |
| st.markdown(f"C: {option}") | |
| elif c==3: | |
| st.markdown(f"D: {option}") | |
| if output is not None: | |
| # Convert the dictionary to a DataFrame | |
| df = pd.DataFrame(output['questions']) | |
| # Convert the options from lists to strings | |
| # df['options'] = df['options'].apply(lambda x: ','.join(x)) | |
| df = df.drop(labels=['options_algorithm','extra_options','context','question_type'],axis=1) | |
| # Convert the DataFrame to CSV | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label='Download Data', | |
| data=csv, | |
| file_name='Generated MCQs.csv', | |
| mime='text/csv' | |
| ) | |
| if st.toggle(label='Show Raw Output'): | |
| st.write(output) |