Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from PyPDF2 import PdfReader | |
| import nltk | |
| import pandas as pd | |
| nltk.download('punkt') | |
| st.title(body='7 - Question Generation') | |
| def get_pdf_text(pdf_docs): | |
| text = "" | |
| for pdf in pdf_docs: | |
| pdf_reader = PdfReader(pdf) | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| ######################################################## | |
| st.subheader(body='Proposition 1',divider='orange') | |
| if st.toggle(label='Show Proposition 1'): | |
| st.title('Question Generator from PDFs') | |
| if st.checkbox('Show Caption'): | |
| st.caption('Hugging Face Model used: ramsrigouthamg/t5_squad_v1') | |
| pipe = pipeline( | |
| task = 'text2text-generation', | |
| model = 'ramsrigouthamg/t5_squad_v1' | |
| ) | |
| file = st.file_uploader(label='Upload',accept_multiple_files=True) | |
| # pr = st.button(label='Process') | |
| raw_text = get_pdf_text(file) | |
| sentences = nltk.sent_tokenize(text=raw_text) | |
| s = pipe(sentences) | |
| questions = [] | |
| for i in s: | |
| x = i['generated_text'][10:] | |
| questions.append(x) | |
| # st.write(f':blue[{x}]') | |
| if st.toggle('Show Text'): | |
| st.write(raw_text) | |
| if st.toggle(label='Show Questions'): | |
| st.subheader("*Generated Questions are:*") | |
| for i in questions: | |
| st.write(f':blue[{i}]') | |
| if st.toggle(label='Show Pipeline Output'): | |
| st.write(s) | |
| if st.toggle(label='Show Questions list'): | |
| st.write(questions) | |
| if questions: | |
| df = pd.DataFrame(questions, columns=["Question"]) | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="Download Questions as CSV", | |
| data=csv, | |
| file_name='questions.csv', | |
| mime='text/csv' | |
| ) | |