Spaces:

nishant43s
/

GenAi-Summarizer

Running

App Files Files Community

GenAi-Summarizer / data /document.py

nishant43s

Upload 14 files

088848a verified 8 months ago

raw

history blame contribute delete

10.2 kB

	import streamlit as st
	from functions import *
	from transformers import pipeline
	from pdfminer.high_level import extract_text
	import os
	import PyPDF2
	import base64



	#### chatbot function

	def Chat_Bot(text_input,Best_size,max_answer_length):
	st.markdown(
	"""
	<style>
	/* Fix the chat input box at the bottom */
	div[data-testid="stChatInput"] {
	position: fixed;
	bottom: 0;
	margin-bottom: 36px;

	}
	</style>
	""",
	unsafe_allow_html=True
	)
	# Load the Question Answering model
	qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")

	# Initialize session state for chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []


	# User inputs context
	context = text_input

	# Display chat history
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if context:
	user_input = st.chat_input("💬 Ask a question based on the context:")
	if user_input:
	with st.chat_message("user"):
	st.markdown(user_input)

	st.session_state.messages.append({"role": "user", "content": user_input})

	with st.spinner("🤔 Thinking..."):
	response = qa_pipeline(
	{"question": user_input, "context": context},
	max_answer_len=max_answer_length, n_best_size=Best_size
	)
	answer = response["answer"]

	with st.chat_message("assistant"):
	st.markdown(f"{answer}")

	st.session_state.messages.append({"role": "assistant", "content": f"{answer}"})

	# Clear chat history button
	if st.button("🗑️ Clear Chat"):
	st.session_state.messages = []
	st.rerun()


	# page settings
	st.set_page_config(
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	### insert external css
	def insert_css(css_file:str):
	with open(css_file) as f:
	st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)

	# app settings css
	insert_css("css_files/app.css")

	def extract_pdf_text(pdf_file):
	"""Extracts text from a PDF file."""
	return extract_text(pdf_file)



	#### displaying uploaded pdf file
	def display_pdf_file(uploaded_file):
	"""
	it is used to display the
	file on screen
	"""
	#### saving the uploaded file
	def save_uploadfile(save_file):
	with open(os.path.join("data",save_file.name),"wb") as f:
	f.write(save_file.getbuffer())
	return st.toast("file uploaded: {}".format(save_file.name))

	try:
	### display pdf on screen
	def displayPDF(pdf_file):
	with open(pdf_file,"rb") as f:
	base64_pdf = base64.b64encode(f.read()).decode("utf-8")

	pdf_display = f"""
	<iframe
	src="data:application/pdf;base64,{base64_pdf}"
	width="950" height="1000"
	type="application/pdf"
	>
	</iframe>
	"""

	st.markdown(pdf_display,unsafe_allow_html=True)

	### save and display file
	save_uploadfile(uploaded_file)
	pdf_file = "data/"+uploaded_file.name
	displayPDF(pdf_file)
	except Exception as e:
	st.warning("Something Went wrong...\n\n",e,icon="⚠️")


	# --- PDF Page Text Extractor Function ---
	def extract_text_from_pdf(pdf_file, page_num):
	try:
	reader = PyPDF2.PdfReader(pdf_file)
	total_pages = len(reader.pages)
	if 1 <= page_num <= total_pages:
	page = reader.pages[page_num - 1] # Adjusting for 0-based index
	text = page.extract_text()
	return text, total_pages
	else:
	return None, total_pages
	except Exception as e:
	st.error(f"Error extracting text: {e}")
	return None, 0

	def pdf_Summarizer(file):
	Display_col, Summarizer_col = st.tabs(["Pdf Display","PDF Summarizer"])
	with Display_col:
	display_pdf_file(file)
	with Summarizer_col:
	temp_reader = PyPDF2.PdfReader(file)
	total_pages = len(temp_reader.pages)
	st.write(f"### Total Pages: {total_pages}")

	## columns
	Input_col = st.columns([4,10])
	with Input_col[0]:
	page_number = st.number_input(
	"Select page number",
	min_value=1, max_value=total_pages,
	value=1, step=1)
	st.write("Page Number {}".format(page_number))
	text, _ = extract_text_from_pdf(file, page_number)
	return text


	app_sidebar = st.sidebar

	with app_sidebar:
	select_mode = st.selectbox(
	label="Select Mode",
	options=["Summarizer","Que/Ans"],
	key="mode selector",
	index=0
	)

	if select_mode == "Que/Ans":
	st.write("### Que/Ans Settings")

	max_answer_length = st.slider(
	label="Max answer",
	min_value=1,
	max_value=10,
	key="max answer",
	value=4
	)

	max_answer_length = max_answer_length*10

	Best_size_ = st.slider(
	label="n best size",
	min_value=1,
	max_value=10,
	key="best size",
	value=5
	)

	def Summarizer_Model(context,Max_Length):
	try:
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	Summary = summarizer(
	context,
	max_length=Max_Length+20,
	min_length=Max_Length,
	do_sample=False
	)
	return Summary[0]['summary_text']

	except Exception as e:
	st.warning(f"Error...\n{e}",icon="⚠️")

	app_col = st.columns([2,8,2])

	with app_col[1]:

	if select_mode == "Summarizer":
	st.write("## 📑 Document Summarizer")
	elif select_mode == "Que/Ans":
	st.write("## 📑 Document Question Answering")

	### question answering
	que_col = st.columns([2,8,2])

	with que_col[1]:
	if select_mode == "Que/Ans":
	## input file
	File_input = st.file_uploader(
	label="Drop Your File hear",
	type=["txt", "pdf"],
	key="file uploader"
	)

	if File_input is not None:
	if File_input.type == "text/plain":
	text = File_input.read().decode("utf-8")
	Chat_Bot(
	text_input=Text_Cleaning(text),
	Best_size=Best_size_,
	max_answer_length=max_answer_length
	)
	else:
	text = extract_pdf_text(File_input)
	Chat_Bot(
	text_input=Text_Cleaning(text),
	Best_size=Best_size_,
	max_answer_length=max_answer_length
	)

	# session state
	if 'input_text' not in st.session_state:
	st.session_state.input_text = []

	if 'pdf_text' not in st.session_state:
	st.session_state.pdf_text = []

	if 'summary_text' not in st.session_state:
	st.session_state.summary_text = []

	summ_col = st.columns([2,8,2])

	with summ_col[1]:
	if select_mode == "Summarizer":
	## input file
	File_input = st.file_uploader(
	label="Drop Your File hear",
	type=["txt", "pdf"],
	key="file uploader"
	)
	if File_input is not None:
	if File_input.type == "text/plain":
	text = File_input.read().decode("utf-8")
	st.session_state.input_text = []
	st.session_state.input_text = st.text_area(label="Uploaded document Text",value=Text_Cleaning(text),height=200)
	Text_input = Text_Cleaning(st.session_state.input_text)
	value_func = lambda x: x * 0.3
	max_length = st.slider(
	label="Max Length",min_value=1,
	max_value=len(st.session_state.input_text.split()),
	value=int(value_func(len(st.session_state.input_text.split())))
	)

	if st.button(label="📄 Generate Summary"):
	with st.spinner("Generating Summary"):

	Generated_Summary = Summarizer_Model(context=Text_input,Max_Length=max_length)
	st.write(Generated_Summary)
	Copy_Text(Generated_Summary)

	else:
	st.session_state.pdf_text = []
	st.session_state.summary_text = []
	st.session_state.pdf_text = pdf_Summarizer(File_input)

	## text area
	Text_Area_Input = st.text_area(
	"Pdf Text",value=Text_Cleaning(st.session_state.pdf_text),
	key="text area",height=450
	)

	value_func = lambda x: x * 0.3
	Max_Pdf_Summary_len = st.slider(
	label="MAx Length",
	min_value=1,
	max_value=len(Text_Area_Input.split()),
	value=int(value_func(len(Text_Area_Input.split()))),
	key="pdf summarizer Slider"
	)

	if st.button("📑 Generate Summary",key="pdf Summary"):
	# generating summary
	with st.spinner("Generating Summary"):
	## initilizing model
	st.session_state.summary_text = Summarizer_Model(
	context=Text_Area_Input,Max_Length=Max_Pdf_Summary_len
	)

	st.write(st.session_state.summary_text)
	Copy_Text(st.session_state.summary_text)