import streamlit as st from functions import * from transformers import pipeline from pdfminer.high_level import extract_text import os import PyPDF2 import base64 #### chatbot function def Chat_Bot(text_input,Best_size,max_answer_length): st.markdown( """ """, unsafe_allow_html=True ) # Load the Question Answering model qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # Initialize session state for chat history if "messages" not in st.session_state: st.session_state.messages = [] # User inputs context context = text_input # Display chat history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if context: user_input = st.chat_input("💬 Ask a question based on the context:") if user_input: with st.chat_message("user"): st.markdown(user_input) st.session_state.messages.append({"role": "user", "content": user_input}) with st.spinner("🤔 Thinking..."): response = qa_pipeline( {"question": user_input, "context": context}, max_answer_len=max_answer_length, n_best_size=Best_size ) answer = response["answer"] with st.chat_message("assistant"): st.markdown(f"{answer}") st.session_state.messages.append({"role": "assistant", "content": f"{answer}"}) # Clear chat history button if st.button("🗑️ Clear Chat"): st.session_state.messages = [] st.rerun() # page settings st.set_page_config( layout="wide", initial_sidebar_state="collapsed" ) ### insert external css def insert_css(css_file:str): with open(css_file) as f: st.markdown(f"",unsafe_allow_html=True) # app settings css insert_css("css_files/app.css") def extract_pdf_text(pdf_file): """Extracts text from a PDF file.""" return extract_text(pdf_file) #### displaying uploaded pdf file def display_pdf_file(uploaded_file): """ it is used to display the file on screen """ #### saving the uploaded file def save_uploadfile(save_file): with open(os.path.join("data",save_file.name),"wb") as f: f.write(save_file.getbuffer()) return st.toast("file uploaded: {}".format(save_file.name)) try: ### display pdf on screen def displayPDF(pdf_file): with open(pdf_file,"rb") as f: base64_pdf = base64.b64encode(f.read()).decode("utf-8") pdf_display = f""" """ st.markdown(pdf_display,unsafe_allow_html=True) ### save and display file save_uploadfile(uploaded_file) pdf_file = "data/"+uploaded_file.name displayPDF(pdf_file) except Exception as e: st.warning("Something Went wrong...\n\n",e,icon="⚠️") # --- PDF Page Text Extractor Function --- def extract_text_from_pdf(pdf_file, page_num): try: reader = PyPDF2.PdfReader(pdf_file) total_pages = len(reader.pages) if 1 <= page_num <= total_pages: page = reader.pages[page_num - 1] # Adjusting for 0-based index text = page.extract_text() return text, total_pages else: return None, total_pages except Exception as e: st.error(f"Error extracting text: {e}") return None, 0 def pdf_Summarizer(file): Display_col, Summarizer_col = st.tabs(["Pdf Display","PDF Summarizer"]) with Display_col: display_pdf_file(file) with Summarizer_col: temp_reader = PyPDF2.PdfReader(file) total_pages = len(temp_reader.pages) st.write(f"### Total Pages: {total_pages}") ## columns Input_col = st.columns([4,10]) with Input_col[0]: page_number = st.number_input( "Select page number", min_value=1, max_value=total_pages, value=1, step=1) st.write("Page Number {}".format(page_number)) text, _ = extract_text_from_pdf(file, page_number) return text app_sidebar = st.sidebar with app_sidebar: select_mode = st.selectbox( label="Select Mode", options=["Summarizer","Que/Ans"], key="mode selector", index=0 ) if select_mode == "Que/Ans": st.write("### Que/Ans Settings") max_answer_length = st.slider( label="Max answer", min_value=1, max_value=10, key="max answer", value=4 ) max_answer_length = max_answer_length*10 Best_size_ = st.slider( label="n best size", min_value=1, max_value=10, key="best size", value=5 ) def Summarizer_Model(context,Max_Length): try: summarizer = pipeline("summarization", model="facebook/bart-large-cnn") Summary = summarizer( context, max_length=Max_Length+20, min_length=Max_Length, do_sample=False ) return Summary[0]['summary_text'] except Exception as e: st.warning(f"Error...\n{e}",icon="⚠️") app_col = st.columns([2,8,2]) with app_col[1]: if select_mode == "Summarizer": st.write("## 📑 Document Summarizer") elif select_mode == "Que/Ans": st.write("## 📑 Document Question Answering") ### question answering que_col = st.columns([2,8,2]) with que_col[1]: if select_mode == "Que/Ans": ## input file File_input = st.file_uploader( label="Drop Your File hear", type=["txt", "pdf"], key="file uploader" ) if File_input is not None: if File_input.type == "text/plain": text = File_input.read().decode("utf-8") Chat_Bot( text_input=Text_Cleaning(text), Best_size=Best_size_, max_answer_length=max_answer_length ) else: text = extract_pdf_text(File_input) Chat_Bot( text_input=Text_Cleaning(text), Best_size=Best_size_, max_answer_length=max_answer_length ) # session state if 'input_text' not in st.session_state: st.session_state.input_text = [] if 'pdf_text' not in st.session_state: st.session_state.pdf_text = [] if 'summary_text' not in st.session_state: st.session_state.summary_text = [] summ_col = st.columns([2,8,2]) with summ_col[1]: if select_mode == "Summarizer": ## input file File_input = st.file_uploader( label="Drop Your File hear", type=["txt", "pdf"], key="file uploader" ) if File_input is not None: if File_input.type == "text/plain": text = File_input.read().decode("utf-8") st.session_state.input_text = [] st.session_state.input_text = st.text_area(label="Uploaded document Text",value=Text_Cleaning(text),height=200) Text_input = Text_Cleaning(st.session_state.input_text) value_func = lambda x: x * 0.3 max_length = st.slider( label="Max Length",min_value=1, max_value=len(st.session_state.input_text.split()), value=int(value_func(len(st.session_state.input_text.split()))) ) if st.button(label="📄 Generate Summary"): with st.spinner("Generating Summary"): Generated_Summary = Summarizer_Model(context=Text_input,Max_Length=max_length) st.write(Generated_Summary) Copy_Text(Generated_Summary) else: st.session_state.pdf_text = [] st.session_state.summary_text = [] st.session_state.pdf_text = pdf_Summarizer(File_input) ## text area Text_Area_Input = st.text_area( "Pdf Text",value=Text_Cleaning(st.session_state.pdf_text), key="text area",height=450 ) value_func = lambda x: x * 0.3 Max_Pdf_Summary_len = st.slider( label="MAx Length", min_value=1, max_value=len(Text_Area_Input.split()), value=int(value_func(len(Text_Area_Input.split()))), key="pdf summarizer Slider" ) if st.button("📑 Generate Summary",key="pdf Summary"): # generating summary with st.spinner("Generating Summary"): ## initilizing model st.session_state.summary_text = Summarizer_Model( context=Text_Area_Input,Max_Length=Max_Pdf_Summary_len ) st.write(st.session_state.summary_text) Copy_Text(st.session_state.summary_text)