Spaces:

nishant43s
/

GenAi-Summarizer

Running

App Files Files Community

GenAi-Summarizer / app.py

nishant43s

Update app.py

f432ce7 verified 3 months ago

raw

history blame contribute delete

52 kB

	import streamlit as st ### importing liberaries
	from streamlit_extras.colored_header import colored_header
	from streamlit_option_menu import option_menu
	import streamlit.components.v1 as component
	from streamlit_lottie import st_lottie, st_lottie_spinner
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.pipeline import make_pipeline
	from transformers import pipeline
	from transformers import AutoTokenizer , AutoModelForSeq2SeqLM
	from newspaper import Article
	import nltk
	import nltk.downloader
	nltk.download('punkt_tab')
	from nltk.tokenize import word_tokenize
	from cleantext import clean
	from PyPDF2 import PdfReader
	import pdfminer
	from pdfminer.high_level import extract_text
	from pdfminer.high_level import extract_pages
	from pdfminer.layout import LTTextContainer, LTChar, LTTextLine
	import requests
	import json
	import numpy as np
	import pandas as pd
	import random
	import base64
	import lxml
	import lxml_html_clean
	import re
	import os


	###### main app functions

	### insert external css
	def insert_css(css_file:str):
	with open(css_file) as f:
	st.markdown(f"<style>{f.read()}</style>",unsafe_allow_html=True)

	### insert external html file
	def insert_html(html_file):
	with open(html_file) as f:
	return f.read()

	### insert lottie animation json files
	def insert_lottie_animation(animation_file:str):
	with open(animation_file, "r") as f:
	return json.load(f)

	### app tutorial video function
	@st.dialog("App Tutorial")
	def watch_tutorial():
	st.subheader("GenAi Summarizer🤖")
	video_file = open("app_tutorial.mp4", "rb")
	video_bytes = video_file.read()
	st.text("")
	st.video(
	data=video_bytes,format="video/mp4",
	loop=True,autoplay=True
	)


	def download_text(text, filename):
	"""
	download article text
	in document format
	"""
	#### Convert string to bytes
	b64 = base64.b64encode(text.encode()).decode()

	href = f"""
	<a href="data:application/octet-stream;base64,{b64}" download="{filename}">
	<button class="neon-button">Download</button>
	</a>
	"""

	st.markdown(href, unsafe_allow_html=True)
	if __name__=="__main__":
	insert_css("cssfiles/download-article.css")


	def copy_text(text):
	html_code = f"""
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.6.0/css/all.min.css" integrity="sha512-Kc323vGBEqzTmouAECnVceyQqyqdsSiqLQISBL29aUW4U/M7pSPA/gEUZQqv1cwx4OnYxTxve5UMg5GT6L4JJg==" crossorigin="anonymous" referrerpolicy="no-referrer" />
	<style>
	*{{
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	}}
	.copy-button{{
	font-size: 24px;
	cursor: pointer;
	color: #5b70f3;
	transition: 0.3s ease-in-out;
	}}
	</style>
	</head>
	<body>
	<a class="copy-button" onclick="copyText()">
	<i class="fa-solid fa-copy"></i>
	</a>
	<br>
	<br>
	<p id="textToCopy">{text}</p>

	<script>
	function copyText() {{
	// Get the text from the <p> tag
	const text = document.getElementById('textToCopy').innerText;

	// Create a temporary <textarea> element
	const textarea = document.createElement('textarea');
	textarea.value = text;
	document.body.appendChild(textarea);

	// Select the text in the <textarea>
	textarea.select();

	// Execute the copy command
	document.execCommand('copy');

	// Remove the <textarea> element from the DOM
	document.body.removeChild(textarea);

	alert('Text copied');
	}}
	</script>
	</body>
	</html>

	"""

	component.html(html_code,height=28)


	### copy and download button
	def Copy_download_button(article_text,article_format,article_file_name):
	try:
	### column for copy and download article
	Copy_btn_col,download_btn_col, blank_col_copy1, blank_col_copy2= st.columns([1,3,5,5],gap="small")

	with blank_col_copy1:
	st.text("")
	with blank_col_copy1:
	st.text("")

	with Copy_btn_col:
	copy_text(article_text)

	with download_btn_col:
	download_text(text=article_format,filename=article_file_name)
	except Exception as e:
	st.warning("Something went wrong...",e,icon="⚠️")


	### setting page layout
	st.set_page_config(
	page_title="GenAi Summarizer",
	page_icon="🤗",
	initial_sidebar_state="collapsed",
	layout="wide"
	)


	#### app settings css
	if __name__=="__main__":
	insert_css("cssfiles/app.css")


	### huging face modals
	Hugingface_modals = {
	"google-pegasus":"google/pegasus-xsum",
	"facebook-bart":"facebook/bart-large-cnn",
	"t5-base":"t5-base"
	}


	### summarization modal
	def Hugingface_summarization_modal(summary_text,modal_name,maximum_length):
	"""
	it is an text summarization modal
	it use hugingface modals for summarization task.
	it generates summarized text output
	"""
	def summarization_modal_name(modal)->str:
	if modal == "google-pegasus":
	return "google/pegasus-xsum"
	elif modal == "facebook-bart":
	return "facebook/bart-large-cnn"
	elif modal == "t5-base":
	return "t5-base"
	try:
	use_modal = summarization_modal_name(modal_name) ### modal name

	auto_tokenizer = AutoTokenizer.from_pretrained(use_modal) ### using autokenizer for pretrained modal
	auto_modal = AutoModelForSeq2SeqLM.from_pretrained(use_modal)

	### creating pipeline
	summarizer = pipeline("summarization",model=auto_modal,tokenizer=auto_tokenizer)

	summarizer_text = summary_text

	summary_generate = summarizer( ### summarizer
	summarizer_text,max_length=maximum_length+20,
	min_length=maximum_length,
	do_sample=False
	)

	return summary_generate[0]['summary_text']

	except Exception as e:
	st.warning("Something went wrong...\n\n",e,icon="⚠️")




	### displaying modals
	@st.cache_data
	def Modal_Level(modal_text):
	if modal_text == "google-pegasus":
	st.markdown(
	f"""
	<div class="google-modal">
	<span style="font-size: 17px; color: #fff;">
	Maodal-
	</span>
	google/pegasus-xsum
	</div>
	""",unsafe_allow_html=True
	)

	elif modal_text == "facebook-bart":
	st.markdown(
	f"""<div class="facebook-modal">
	<span style="font-size: 17px; color: #fff;">
	Maodal-
	</span>
	facebook/bart-large-cnn
	</div>
	""",unsafe_allow_html=True
	)

	elif modal_text == "t5-base":
	st.markdown(
	f"""<div class="t5-modal">
	<span style="font-size: 17px; color: #fff;">
	Maodal-
	</span>
	t5-base
	</div>
	""",unsafe_allow_html=True
	)
	if __name__=="__main__":
	insert_css("cssfiles/modal.css")



	#### creating sidebar
	app_sidebar = st.sidebar

	with app_sidebar:
	st.text("")
	st.subheader("GenAi Summarizer🤖")
	st.write("Developer: Nishant Maity")
	st.text("")
	st.text("")

	### creating menu bar
	Main_menu = option_menu(
	menu_title="",
	options=["Article Summarizer","Text Summarizer","PDF Summarizer","App Info"],
	icons=["chat-dots","card-heading","file-earmark-pdf","person-circle"],
	default_index=0,
	key="Menu Bar"
	)
	st.text("")

	### select modal for text and article summarizer
	if Main_menu == "Article Summarizer" or Main_menu == "Text Summarizer":

	Summarizer_modal = st.selectbox(
	label="Select Modal",
	options=np.array(list(Hugingface_modals.keys())),
	index=1,
	key="Modals"
	)

	#### selecting number or paragraph for article summarizer
	if Main_menu == "Article Summarizer":
	with app_sidebar:
	st.text("")
	st.text("")

	Number_of_article_paragraph = st.slider(
	label="Number of paragraph",
	min_value=1,max_value=10,
	step=1,value=2,
	key="Number of paragraph"
	)

	with app_sidebar:
	st.button(
	label="Watch App Tutorial",
	use_container_width=True,
	on_click=watch_tutorial
	)


	##### article summarizer functions

	##### naive bayes text classification function

	def is_url(text):
	url_pattern = re.compile(
	r'http[s]?://(?:[a-zA-Z]\|[0-9]\|[$-_@.&+]\|[!*\$\$,]\|(?:%[0-9a-fA-F][0-9a-fA-F]))+\|(?:www\.)[^\s]+')
	return bool(url_pattern.match(text))


	# Train a model for text vs URL classification
	def train_model():
	"""
	this function predict the given input
	is a simple text or url,link
	and generate output.
	"""
	#### dataset (normal text and URLs)
	try:
	data = [
	('This is a normal sentence.', 'text'),
	('www.google.com', 'url'),
	('Check out this website', 'text'),
	('https://www.example.com', 'url'),
	('Machine learning is fun', 'text'),
	('http://openai.com', 'url'),
	('Python is a great language', 'text'),
	]
	texts = [d[0] for d in data]
	labels = [1 if d[1] == 'url' else 0 for d in data] ## 1 for url, 0 for text

	##### modal training
	X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

	model = make_pipeline(CountVectorizer(), MultinomialNB())

	model.fit(X_train, y_train) #### Train the model

	model.score(X_train, y_train)
	model.score(X_test, y_test)

	return model

	except Exception as e:
	st.error("Error...\n\n",e,icon="⚠️")



	############################### article summarizer


	if Main_menu == "Article Summarizer":

	blank_article1, article_column, blank_article2 = st.columns([2,8,2],gap="small")

	with blank_article1: ### blank space
	pass
	with blank_article2: ### blank space
	pass

	#### main app column
	with article_column:

	#### app title
	st.text("")
	App_Title = colored_header(
	label="Web Article Summarizer 📑",
	color_name="blue-green-70",
	description="Search or paste url"
	)

	Text_input = st.text_input(
	label="Search or paste url",
	placeholder="machine learning, java url- https://www.example.com"
	)

	### max slider value
	def max_length_slider_value(max_length)->int:
	if max_length == 1:
	return 90
	elif max_length == 2:
	return 150
	elif max_length == 3:
	return 250
	elif max_length == 4:
	return 380
	elif max_length == 5:
	return 470
	elif max_length == 6:
	return 600
	elif max_length == 7:
	return 750
	elif max_length == 8:
	return 900
	elif max_length == 9:
	return 1200
	elif max_length == 10:
	return 1360

	@st.cache_data
	def Default_max_length(default_value):
	if default_value == 1:
	random_value = np.random.randint(30,65,6)
	return random.choice(random_value)

	elif default_value == 2:
	random_value = np.random.randint(50,130,6)
	return random.choice(random_value)

	elif default_value == 3:
	random_value = np.random.randint(70,210,6)
	return random.choice(random_value)

	elif default_value == 4:
	random_value = np.random.randint(140,310,6)
	return random.choice(random_value)

	elif default_value == 5:
	random_value = np.random.randint(200,390,6)
	return random.choice(random_value)

	elif default_value == 6:
	random_value = np.random.randint(230,490,6)
	return random.choice(random_value)

	elif default_value == 7:
	random_value = np.random.randint(280,590,6)
	return random.choice(random_value)

	elif default_value == 8:
	random_value = np.random.randint(350,750,6)
	return random.choice(random_value)

	elif default_value == 9:
	random_value = np.random.randint(450,1050,6)
	return random.choice(random_value)

	elif default_value == 10:
	random_value = np.random.randint(560,1100,6)
	return random.choice(random_value)




	Button_column, Toggle_summary_btn, Modal_display = st.columns([1,1,3],gap="small")


	# article_summarizer(max_length)
	with Button_column:
	### generate article button
	Generate_btn = st.button(label="Generate Article")

	with Toggle_summary_btn:
	### if on then it generates summary
	summary_on = st.toggle(
	label="Summarizer",
	value=False,
	key="Summarizer on off"
	)

	if summary_on:
	st.toast(body="Summarizer Mode on",icon="📑")
	else:
	st.toast(body="Scraping Mode",icon="📰")

	with Modal_display:

	if summary_on:
	Modal_Level(Summarizer_modal)
	else:
	pass
	if summary_on:
	max_length_article = st.slider(
	label="max length",
	min_value=10,max_value=max_length_slider_value(Number_of_article_paragraph),
	key="max length",value=Default_max_length(Number_of_article_paragraph)
	)


	################################################################################################


	### article scraper function
	def article_scraper(article_url):
	"""
	this function is used to scrap
	web articles and it provide
	text in the clean format
	"""
	try:
	article = Article(article_url) ### article object
	article.download()
	article.parse()
	nltk.download("punkt")
	article.nlp()

	st.markdown("<h4>Article</h4>",unsafe_allow_html=True)
	st.text("")
	st.text("")

	st.markdown( ### article title
	f"""
	<h6><b>{article.title}</b></h6>
	""",unsafe_allow_html=True
	)

	article_publishdate = article.publish_date ### article publish date
	if article_publishdate == None:
	pass
	else:
	st.text("published on - "+str(article_publishdate))

	article_authors = article.authors #### article authors
	if len(article_authors) == 0:
	pass
	else:
	autho_name_print = ", ".join(map(str, article_authors))
	st.write(autho_name_print)


	### generating article summary
	def get_top_paragraphs(text, num_paragraphs=Number_of_article_paragraph):
	"""
	this function gives
	top 1 - 10 paragraph of the
	scrap data
	"""
	paragraphs = text.split('\n\n')

	valid_paragraphs = [p.strip() for p in paragraphs if len(p.strip().split()) > 12]
	top_paragraphs = valid_paragraphs[:num_paragraphs]
	return '\n\n'.join(top_paragraphs)


	article_summary = article.text

	def remove_bracketed_numbers(text)->str:
	pattern = r'\[\d+\]'
	cleaned_text = re.sub(pattern, '', text)
	return cleaned_text


	cleaned_article_text = remove_bracketed_numbers(get_top_paragraphs(article_summary))

	if "clean_text" not in st.session_state:
	st.session_state.clean_text = ""

	st.session_state.clean_text = cleaned_article_text

	def clean_output_text(text:str)->str:
	"""
	it gives clean text without emojies,
	no ascii values english text
	"""
	clean_text = clean(
	text=text,fix_unicode=True,
	to_ascii=True,no_emoji=True,
	lang="en",no_line_breaks=False,
	keep_two_line_breaks=True
	)
	return clean_text
	### Print the cleaned text
	st.write(clean_output_text(st.session_state.clean_text))
	st.text("")
	st.text("")


	### copy download button
	Article_filename = f"{article.title}.doc"

	Article_text_format = f"""
	\n\n\n
	{str(article.title)}
	published on - {str(article_publishdate)}
	Authors - {", ".join(map(str, article_authors))}
	\n\n\n
	{str(cleaned_article_text)}
	"""


	if __name__=="__main__":
	Copy_download_button(
	article_text=clean_output_text(cleaned_article_text),
	article_format=Article_text_format,
	article_file_name=Article_filename
	)

	st.text("")

	if summary_on:
	st.markdown("<h4>Article Summary</h4>",unsafe_allow_html=True)

	#### summarization modal

	with st.spinner("Generating Summary..."):


	if __name__=="__main__":
	summarized_article_text = Hugingface_summarization_modal(
	summary_text=clean_output_text(cleaned_article_text),
	modal_name=Summarizer_modal,
	maximum_length=max_length_article
	)
	#### clean ai generated paragraph


	st.write(summarized_article_text)
	st.text("")
	st.text("")

	summary_format = f"""

	\n\n
	{article.title}
	\n\n\n
	{summarized_article_text}
	"""
	#### copy or download summary button
	if __name__=="__main__":
	Copy_download_button(
	article_text=summarized_article_text,
	article_file_name=f"{article.title}-summary.doc",
	article_format=summary_format
	)

	if summary_on:

	### summarization details
	summarization_details = {
	"Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"],
	"Output":[
	f"{Summarizer_modal}",
	f"Length - {len(cleaned_article_text.split())}",
	f"Length - {len(summarized_article_text.split())}",
	f"Tokens Used - {max_length_article}"
	]
	}

	summarization_details_df = pd.DataFrame(
	data=summarization_details,
	index=["Hugingface Modal","No. words","No. Words","Max Length"]
	)

	st.text("")
	st.text("")
	st.text("")
	st.dataframe(summarization_details_df,use_container_width=True)



	except Exception as err:
	### 404 error animation

	Error_404_col, page_not_found_col = st.columns(2)

	with Error_404_col:

	try:
	Error_404 = insert_lottie_animation("lottie_animations/error-404.json")
	st_lottie(
	animation_source=Error_404,
	speed=1,
	reverse=False,loop=True,
	quality="high",
	height=315,
	width=400,
	key="404 error"
	)
	except Exception as err:
	st.warning("something went wrong...",err,icon="⚠️")

	with page_not_found_col:

	try:
	page_not_found = insert_lottie_animation("lottie_animations/page-not-found.json")
	st_lottie(
	animation_source=page_not_found,
	speed=1,
	reverse=False,loop=True,
	quality="high",
	height=265,
	width=400,
	key="page not found"
	)
	except Exception as err:
	st.warning("something went wrong...",err,icon="⚠️")

	st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")

	def article_summarizer(summary_length):
	st.write(summary_length)


	def check_url_exists(url):
	try:
	response = requests.head(url, allow_redirects=True)
	if response.status_code < 400:
	return True
	else:
	return False
	except requests.exceptions.RequestException as e:
	# Handle any exception (e.g., connection error, timeout)
	return False


	########### link classified article
	def link_classified(text):
	"""
	it use url or link to scrap articles
	provide author name, publish date, summary of
	article
	"""
	try:
	url_text = text
	article_url_link = f"{url_text}" ### url to scrap
	if __name__=="__main__":
	article_scraper(article_url_link)
	st.text("")
	st.text("")

	if check_url_exists(article_url_link):
	st.link_button(label="Visit Article",url=(article_url_link))
	else:
	st.warning("Url does not exist...",icon="⚠️")

	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",unsafe_allow_html=True)

	except Exception as err:
	st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")



	####$ text classified article
	def text_classified(text):
	"""
	it use wikipedia to scrap articles
	provide author name, publish date, summary of
	article
	"""
	try:
	url_text = text.replace(" ","_")
	article_url = f"https://en.wikipedia.org/wiki/{url_text}" ### url to scrap
	if __name__=="__main__":
	article_scraper(article_url)
	st.text("")
	st.text("")

	if check_url_exists(article_url):
	st.link_button(label="Visit Article",url=article_url)
	else:
	st.warning("Url does not exist...",icon="⚠️")

	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",unsafe_allow_html=True)


	except Exception as e:
	st.warning("Something went wrong...",e,icon="⚠️")



	############################################################################################

	### j query animation
	if not Generate_btn or Text_input.strip() == "":

	try:
	def particle(Js_file):
	with open(Js_file) as f:
	component.html(f"{f.read()}", height=420)

	if __name__=="__main__":
	particle("animation/particles.html")

	except Exception as e:
	st.error("Something went wrong...\n\n",e)

	if Generate_btn:
	if Text_input.strip() != "":
	st.text("")
	st.text("")

	### Function to classify the input text
	def classify_input(text, model):
	try:
	if is_url(text):
	link_classified(text)
	else:
	#### If it's not detected as a URL
	prediction = model.predict([text])[0]
	if prediction == 1:
	link_classified(Text_input)
	else:
	text_classified(Text_input)
	except Exception as e:
	st.error("Error...\n\n",e,icon="⚠️")

	with st.spinner("Generating Article..."):
	if __name__=="__main__":
	model = train_model()
	classify_input(Text_input, model)



	####################################################################################################


	################################# Text summarizer


	if Main_menu == "Text Summarizer":

	blank_text_sum1, text_summarizer_col, blank_text_sum2 = st.columns([2,8,2],gap="small")

	### blank columns
	with blank_text_sum1:
	pass
	with blank_text_sum2:
	pass

	### text summarizer app column

	with text_summarizer_col:
	#### app title
	st.text("")
	text_summarizer_Title = colored_header(
	label="Text Summarizer 📄",
	color_name="violet-70",
	description="enter or paste text hear"
	)

	placeholder_text = """write or paste your text hear
	paragraph length should be greater then 30 words
	to generate output tap on screen or press ctrl+enter
	"""

	### input box
	text_summarizer_input = st.text_area(
	label="Enter Text Hear",
	placeholder=placeholder_text,
	height=340,
	key="text summarizer"
	)
	Modal_Level(Summarizer_modal)

	if text_summarizer_input.strip() == "":

	try:
	#### writing animation
	write_hear_animation = insert_lottie_animation("lottie_animations/write-hear.json")
	st_lottie(
	animation_source=write_hear_animation,
	speed=1,
	reverse=False,loop=True,
	quality="medium",
	height=165,
	width=240,
	key="write hear"
	)
	except Exception as err:
	st.warning("something went wrong...",err,icon="⚠️")

	### enter paragraph length greater than 35 words
	elif len(text_summarizer_input.split()) < 20:
	st.warning("paragraph should be greater than 35 words",icon="✏️")

	else:

	def word_token_maxvalue(text:str)->int:
	"""
	converting paragraph into
	tokens
	"""
	word_para = []
	words = word_tokenize(text)
	for i in words:
	word_para.append(i)

	return len(word_para)

	@st.cache_data
	def random_value_text(text:str)->int:
	random_value = np.random.randint(
	10,word_token_maxvalue(text),6
	)

	return random.choice(random_value)

	def clean_data_for_summarization(text:str)->str:
	clean_text = clean(
	text=text,fix_unicode=True,
	to_ascii=True,no_emoji=True,
	lang="en",no_line_breaks=False,
	keep_two_line_breaks=True
	)
	return clean_text



	text_Max_length = st.slider(
	label="Max length",
	min_value=10,
	max_value=word_token_maxvalue(text_summarizer_input),
	key="text summarizer max length",
	step=1,value=random_value_text(text_summarizer_input)
	)

	Generate_text_summary = st.button(
	label="Generate summary",key="text summary"
	)

	try:
	#### writing loading
	writing_loading_animation = insert_lottie_animation("lottie_animations/writing-loading.json")
	summary_generating_animation = st_lottie_spinner(
	animation_source=writing_loading_animation,
	speed=2,
	reverse=False,loop=True,
	quality="medium",
	height=165,
	width=240,
	key="writing generating"
	)
	except Exception as err:
	st.warning("something went wrong...",err,icon="⚠️")


	#### initilization of modal
	if Generate_text_summary:

	if __name__=="__main__":

	##### summary generation
	with summary_generating_animation:

	### modal
	Text_Summary_output = Hugingface_summarization_modal(
	summary_text=clean_data_for_summarization(text_summarizer_input),
	modal_name=Summarizer_modal,
	maximum_length=text_Max_length
	)

	##### summary displaying and copy
	st.text("")
	st.text("")
	st.markdown("<h4>Generated Summary</h4>",unsafe_allow_html=True)
	st.text("")
	st.write(Text_Summary_output)
	st.text("")

	copy_text(Text_Summary_output)
	st.text("")
	st.text("")

	###### original text desplay and copy
	st.markdown("<h4>Original Text</h4>",unsafe_allow_html=True)
	st.text("")
	original_text = clean_data_for_summarization(text_summarizer_input)
	st.write(original_text)
	st.text("")
	copy_text(original_text)

	st.text("")
	st.text("")
	st.text("")

	### summarization details
	text_summarization_details = {
	"Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"],
	"Output":[
	f"{Summarizer_modal}",
	f"Length - {len(text_summarizer_input.split())}",
	f"Length - {len(Text_Summary_output.split())}",
	f"Tokens Used - {text_Max_length}"
	]
	}

	summarization_details_df = pd.DataFrame(
	data=text_summarization_details,
	index=["Hugingface Modal","No. words","No. Words","Max Length"]
	)

	st.text("")
	st.text("")
	st.text("")
	st.dataframe(summarization_details_df,use_container_width=True)
	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",unsafe_allow_html=True)



	##############################################################################################################

	############################## pdf summarizer


	#### pdf and text summarizer functions


	#### displaying uploaded pdf file
	def display_pdf_file(uploaded_file):
	"""
	it is used to display the
	file on screen
	"""
	#### saving the uploaded file
	def save_uploadfile(save_file):
	with open(os.path.join("data",save_file.name),"wb") as f:
	f.write(save_file.getbuffer())
	return st.toast("file uploaded: {}".format(save_file.name))

	try:
	### display pdf on screen
	def displayPDF(pdf_file):
	with open(pdf_file,"rb") as f:
	base64_pdf = base64.b64encode(f.read()).decode("utf-8")

	pdf_display = f"""
	<iframe
	src="data:application/pdf;base64,{base64_pdf}"
	width="580" height="700"
	type="application/pdf"
	>
	</iframe>
	"""

	st.markdown(pdf_display,unsafe_allow_html=True)

	### save and display file
	save_uploadfile(uploaded_file)
	pdf_file = "data/"+uploaded_file.name
	displayPDF(pdf_file)
	except Exception as e:
	st.warning("Something Went wrong...\n\n",e,icon="⚠️")


	#### Function to extract text from a specific page using pdfminer
	def extract_text_pdfminer(pdf_file, page_number):
	"""
	this function extract pdf file
	text by user input page number
	"""
	try:
	extracted_text = ''
	for i, page_layout in enumerate(extract_pages(pdf_file)):
	if i == page_number - 1:
	### Extract text elements and format them as closely as possible to the original layout
	for element in page_layout:
	if isinstance(element, LTTextContainer):
	for text_line in element:
	if isinstance(text_line, LTTextLine):
	line = ''.join([char.get_text() for char in text_line if isinstance(char, LTChar)])
	extracted_text += line.strip() + '\n'
	return extracted_text
	return st.warning("Invalid page number.",icon="⚠️")
	except Exception as e:
	st.warning("Something Went wrong...\n\n",e,icon="⚠️")


	###############################################


	##### clean text for summmarization task
	def uploaded_Clean_Text_Summarization(clean_text:str)->str:
	"""
	it gives clean text for
	summarization task
	"""
	try:
	pattern = r'[\|`~^$<>]'
	cleaned_paragraph = re.sub(pattern, '', clean_text)

	### using clean function
	clean_output_para = clean(
	text=cleaned_paragraph,fix_unicode=True,
	to_ascii=True,no_emoji=True,
	lang="en",no_line_breaks=False,
	keep_two_line_breaks=True
	)

	except Exception as e:
	st.warning("Something Went wrong...\n\n",e,icon="⚠️")

	return clean_output_para


	### convert paragraph into tokens
	def generate_text_para_tokens(text_para:str)->int:
	"""
	converting paragraph into
	tokens
	"""
	try:
	pattern = r'[\|`~#^$<>]'
	cleaned_paragraph = re.sub(pattern, '', text_para)

	#### using clean function
	clean_para = clean(
	text=cleaned_paragraph,fix_unicode=True,
	to_ascii=True,no_emoji=True,
	lang="en",no_line_breaks=False,
	keep_two_line_breaks=True
	)

	word_tokens = []

	for i in word_tokenize(clean_para):
	word_tokens.append(i)
	return len(np.array(word_tokens))

	except Exception as e:
	st.warning("Something Went wrong...\n\n",e,icon="⚠️")



	### generates random value for slider
	@st.cache_data
	def random_text_para_value(para:str)->int:
	try:
	random_value = np.random.randint(
	20, generate_text_para_tokens(para), 6
	)
	return random.choice(random_value)
	except Exception as e:
	st.warning("Something Went wrong...\n\n",e,icon="⚠️")


	#### PDF files summarizer
	def process_pdf(file):
	reader = PdfReader(file)
	page_count = len(reader.pages)

	### pdf display and information column
	pdf_display_tab, pdf_summarizer_tab = st.tabs([f"Displaying {file.name}","Pdf Summarizer"])

	####### displaying pdf on pdf display tab
	with pdf_display_tab:
	st.markdown(f"<h4>Pdf - {file.name}</h4>",unsafe_allow_html=True)

	pdf_col, pdf_info_col = st.columns([5,3],gap="medium")
	with pdf_col:
	with st.spinner("Displaying file..."):
	if __name__=="__main__":
	display_pdf_file(file)

	with pdf_info_col:
	st.write("Your File: {}".format(file.name))
	st.write(f"Number of pages: {str(page_count)}")
	st.markdown(insert_html("htmlfiles/pdf-summarizer-info.html"),unsafe_allow_html=True)


	### pdf information and intract with pdf
	with pdf_summarizer_tab:

	st.text("")
	st.markdown("<h4>Extract pdf text</h4>",unsafe_allow_html=True)

	### toggle button for extracting text
	extract_by_page_all = st.toggle(
	label="Extract whole Text",key="toggle for extract text",
	value=False
	)

	### extracting all pdf text
	if extract_by_page_all:
	st.write("Extract whole pdf Text")

	if st.button("Extract Whole Pdf",key="whole pdf text extract"):

	st.text("")
	st.text("")

	with st.spinner("Extracting pdf..."):
	whole_pdf_text = extract_text(file)
	st.markdown("<h4 style='font-size: 26px'>Whole PDF Text</h4>",unsafe_allow_html=True)
	st.text("")
	st.write(whole_pdf_text)
	else:
	reader = PdfReader(file)
	total_pages = len(reader.pages)
	st.write("Extract by page Number")

	pdf_page_no_col, pdf_page_noinfo_col = st.columns([3,5],gap="small")

	with pdf_page_no_col:

	### input page number
	Pdf_page_number_input = st.number_input(
	label="Select the page number",
	min_value=1, max_value=total_pages,
	value=1,key="pdf page number",step=1
	)

	with pdf_page_noinfo_col:
	st.text("")
	st.text("")
	st.write(f"Selected page: {str(Pdf_page_number_input)}")

	Extract_page_no_button = st.button(
	label="Extract Page text",
	key="Extract button for page"
	)
	st.text("")
	st.text("")

	if Extract_page_no_button:
	text_pdfminer = extract_text_pdfminer(file, Pdf_page_number_input)
	st.session_state['extracted_text'] = text_pdfminer ### Store the extracted text in session state

	if 'extracted_text' in st.session_state:
	Pdf_file_text = st.text_area(
	label=f"Text data of {Pdf_page_number_input} page",
	value= st.session_state['extracted_text'],
	height=400
	)
	st.session_state['extracted_text'] = Pdf_file_text # Update the text in session state based on user's input

	#### pdf summarizer
	st.text("")
	Max_length_pdf_slider = st.slider(
	label="Max Length",key="Pdf summarizer slider",
	min_value=10,max_value=generate_text_para_tokens(Pdf_file_text),
	value=random_text_para_value(Pdf_file_text)
	)
	st.text("")

	upload_Pdf_summary_btn_col, upload_Pdf_print_btn_col, upload_clean_Pdf_print_btn_col, blank_Pdf_col1, blank_Pdf_col2 = st.columns(
	[4,4,4,7,3],gap="small"
	)

	with blank_Pdf_col1:
	pass
	with blank_Pdf_col2:
	pass

	with upload_Pdf_summary_btn_col:
	Generate_upload_pdf_summary_btn = st.button(
	label="Generate Summary",
	key="Generate summary of uploaded text pdf"
	)

	with upload_clean_Pdf_print_btn_col:
	Upload_clean_pdf_btn = st.button(
	label="Print Clean Text",
	key="Print clean pdf file"
	)


	with upload_Pdf_print_btn_col:
	upload_pdf_print_button = st.button(
	label="Print Uploaded Text",
	key="Print uploadded pdf"
	)

	### clean text
	if Upload_clean_pdf_btn:
	with st.spinner("Generating Clean Text..."):
	st.text("")
	st.text("")
	st.markdown("<h4 style='font-size: 26px'>Clean Text</h4>",unsafe_allow_html=True)
	st.text("")
	st.write(uploaded_Clean_Text_Summarization(Pdf_file_text))
	st.text("")
	copy_text(uploaded_Clean_Text_Summarization(Pdf_file_text))
	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",unsafe_allow_html=True)

	### uploaded text
	elif upload_pdf_print_button:
	with st.spinner("Generating Uploaded Text..."):
	st.text("")
	st.text("")
	st.markdown("<h4 style='font-size: 26px'>Uploaded Text</h4>",unsafe_allow_html=True)
	st.text("")
	st.text(Pdf_file_text)
	st.text("")
	copy_text(Pdf_file_text)
	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",unsafe_allow_html=True)

	### generating summary
	elif Generate_upload_pdf_summary_btn:
	st.text("")
	with st.spinner("Generating Summary..."):
	st.text("")
	if __name__=="__main__":
	Uploded_Pdf_file_Summary = Hugingface_summarization_modal(
	summary_text=uploaded_Clean_Text_Summarization(Pdf_file_text),
	maximum_length=Max_length_pdf_slider,
	modal_name="facebook-bart"
	)
	st.markdown("<h4 style='font-size: 26px'>Summary</h4>",unsafe_allow_html=True)
	st.text("")

	st.write(Uploded_Pdf_file_Summary)
	st.text("")
	copy_text(Uploded_Pdf_file_Summary)
	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",unsafe_allow_html=True)




	#################################################


	##### text file summarizer
	def process_text(file):
	text_file = file.read().decode("utf-8")
	st.text("")
	st.markdown("<h4 style='font-size: 26px'>Text file</h4>",unsafe_allow_html=True)


	### displaying text you can edit also
	Uploaded_text = st.text_area(
	label=f"{file.name[:-4]} text data",
	value=text_file,key="text file data",
	height=400
	)
	st.write(f"{file.name[:-4]} Edit your file press ctrl+enter")

	###3 if length is less than 20
	if len(Uploaded_text.split()) < 20:
	st.warning("Summarization Task failed\nnot enough amount of text...",icon="⚠️")

	else:
	st.text("")
	#### max length slider
	max_text_para_length = st.slider(
	label="Max Length",min_value=10,
	max_value=generate_text_para_tokens(Uploaded_text),
	step=1,key="paragraph length",
	value=random_text_para_value(Uploaded_text)
	)
	st.text("")

	upload_text_summary_btn_col, upload_text_print_btn_col, upload_clean_text_print_btn_col, blank_text_col1, blank_text_col2 = st.columns(
	[4,4,4,7,3],gap="small"
	)

	with blank_text_col1:
	pass
	with blank_text_col2:
	pass

	with upload_text_summary_btn_col:
	Generate_upload_text_summary_btn = st.button(
	label="Generate Summary",
	key="Generate summary of uploaded text"
	)

	with upload_clean_text_print_btn_col:
	Upload_clean_text_btn = st.button(
	label="Print Clean Text",
	key="Print clean text file"
	)


	with upload_text_print_btn_col:
	upload_text_print_button = st.button(
	label="Print Uploaded Text",
	key="Print uploadded text"
	)

	### clean text
	if Upload_clean_text_btn:
	with st.spinner("Generating Clean Text..."):
	st.text("")
	st.text("")
	st.markdown("<h4 style='font-size: 26px'>Clean Text</h4>",unsafe_allow_html=True)
	st.text("")
	st.write(uploaded_Clean_Text_Summarization(Uploaded_text))
	st.text("")
	copy_text(uploaded_Clean_Text_Summarization(Uploaded_text))
	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",
	unsafe_allow_html=True)

	### uploaded text
	elif upload_text_print_button:
	with st.spinner("Generating Uploaded Text..."):
	st.text("")
	st.text("")
	st.markdown("<h4 style='font-size: 26px'>Uploaded Text</h4>",unsafe_allow_html=True)
	st.text("")
	st.text(Uploaded_text)
	st.text("")
	copy_text(Uploaded_text)
	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",
	unsafe_allow_html=True)


	### generating summary
	elif Generate_upload_text_summary_btn:
	st.text("")
	with st.spinner("Generating Summary..."):
	st.text("")
	if __name__=="__main__":
	Uploded_Text_file_Summary = Hugingface_summarization_modal(
	summary_text=uploaded_Clean_Text_Summarization(Uploaded_text),
	maximum_length=max_text_para_length,
	modal_name="facebook-bart"
	)
	st.markdown("<h4 style='font-size: 26px'>Summary</h4>",unsafe_allow_html=True)
	st.text("")

	st.write(Uploded_Text_file_Summary)
	st.text("")
	copy_text(Uploded_Text_file_Summary)
	st.text("")
	st.text("")
	st.text("")
	st.markdown("<h6 style='text-align: center;'>Created by Nishant Maity</h6>",unsafe_allow_html=True)



	if Main_menu == "PDF Summarizer":

	### blank and app columns
	Blank_pdf1 ,pdf_summarizer_col, Blank_pdf2 = st.columns([1,8,1],gap="small")

	with Blank_pdf1:
	pass
	with Blank_pdf2:
	pass

	with pdf_summarizer_col:
	st.text("")
	st.header("PDF Summarizer") ### app heading

	### File uploader function
	app_file_upload = st.file_uploader("Upload a PDF or Text file", type=["pdf", "txt"])

	if app_file_upload is not None:

	### if pdf file
	if app_file_upload.type == "application/pdf":
	if __name__=="__main__":
	process_pdf(app_file_upload)

	#### if text file
	elif app_file_upload.type == "text/plain":
	if __name__=="__main__":
	process_text(app_file_upload)

	else:
	st.info("Upload your pdf, text file")


	#### app info
	if Main_menu == "App Info":
	Blank_app_info1, App_info_col, Blank_app_info2 = st.columns([2,8,2])

	#### blank columns
	with Blank_app_info1:
	pass
	with Blank_app_info2:
	pass

	### app info column
	with App_info_col:
	st.text("")
	st.header("App Info")
	st.text("")

	if __name__=="__main__":
	st.markdown(insert_html("htmlfiles/app-info.html"),
	unsafe_allow_html=True
	)