diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,1504 +1,1504 @@
-import streamlit as st ### importing liberaries
-from streamlit_extras.colored_header import colored_header
-from streamlit_option_menu import option_menu
-import streamlit.components.v1 as component
-from streamlit_lottie import st_lottie, st_lottie_spinner
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.model_selection import train_test_split
-from sklearn.naive_bayes import MultinomialNB
-from sklearn.pipeline import make_pipeline
-from transformers import pipeline
-from transformers import AutoTokenizer , AutoModelForSeq2SeqLM
-from newspaper import Article
-import nltk
-import nltk.downloader
-nltk.download('punkt_tab')
-from nltk.tokenize import word_tokenize
-from cleantext import clean
-from PyPDF2 import PdfReader
-import pdfminer
-from pdfminer.high_level import extract_text
-from pdfminer.high_level import extract_pages
-from pdfminer.layout import LTTextContainer, LTChar, LTTextLine
-import requests
-import json
-import numpy as np
-import pandas as pd
-import random
-import base64
-import lxml
-import lxml_html_clean
-import re
-import os
-
-
-###### main app functions
-
-### insert external css
-def insert_css(css_file:str):
- with open(css_file) as f:
- st.markdown(f"",unsafe_allow_html=True)
-
-### insert external html file
-def insert_html(html_file):
- with open(html_file) as f:
- return f.read()
-
-### insert lottie animation json files
-def insert_lottie_animation(animation_file:str):
- with open(animation_file, "r") as f:
- return json.load(f)
-
-### app tutorial video function
-@st.dialog("App Tutorial")
-def watch_tutorial():
- st.subheader("GenAi Summarizer🤖")
- video_file = open("app_tutorial.mp4", "rb")
- video_bytes = video_file.read()
- st.text("")
- st.video(
- data=video_bytes,format="video/mp4",
- loop=True,autoplay=True
- )
-
-
-def download_text(text, filename):
- """
- download article text
- in document format
- """
- #### Convert string to bytes
- b64 = base64.b64encode(text.encode()).decode()
-
- href = f"""
-
- Download
-
- """
-
- st.markdown(href, unsafe_allow_html=True)
- if __name__=="__main__":
- insert_css("cssfiles/download-article.css")
-
-
-def copy_text(text):
- html_code = f"""
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {text}
-
-
-
-
-
- """
-
- component.html(html_code,height=28)
-
-
-### copy and download button
-def Copy_download_button(article_text,article_format,article_file_name):
- try:
- ### column for copy and download article
- Copy_btn_col,download_btn_col, blank_col_copy1, blank_col_copy2= st.columns([1,3,5,5],gap="small")
-
- with blank_col_copy1:
- st.text("")
- with blank_col_copy1:
- st.text("")
-
- with Copy_btn_col:
- copy_text(article_text)
-
- with download_btn_col:
- download_text(text=article_format,filename=article_file_name)
- except Exception as e:
- st.warning("Something went wrong...",e,icon="⚠️")
-
-
-### setting page layout
-st.set_page_config(
- page_title="GenAi Summarizer",
- page_icon="🤗",
- initial_sidebar_state="collapsed",
- layout="wide"
-)
-
-
-#### app settings css
-if __name__=="__main__":
- insert_css("cssfiles/app.css")
-
-
-### huging face modals
-Hugingface_modals = {
- "google-pegasus":"google/pegasus-xsum",
- "facebook-bart":"facebook/bart-large-cnn",
- "t5-base":"t5-base"
-}
-
-
-### summarization modal
-def Hugingface_summarization_modal(summary_text,modal_name,maximum_length):
- """
- it is an text summarization modal
- it use hugingface modals for summarization task.
- it generates summarized text output
- """
- def summarization_modal_name(modal)->str:
- if modal == "google-pegasus":
- return "google/pegasus-xsum"
- elif modal == "facebook-bart":
- return "facebook/bart-large-cnn"
- elif modal == "t5-base":
- return "t5-base"
- try:
- use_modal = summarization_modal_name(modal_name) ### modal name
-
- auto_tokenizer = AutoTokenizer.from_pretrained(use_modal) ### using autokenizer for pretrained modal
- auto_modal = AutoModelForSeq2SeqLM.from_pretrained(use_modal)
-
- ### creating pipeline
- summarizer = pipeline("summarization",model=auto_modal,tokenizer=auto_tokenizer)
-
- summarizer_text = summary_text
-
- summary_generate = summarizer( ### summarizer
- summarizer_text,max_length=maximum_length+20,
- min_length=maximum_length,
- do_sample=False
- )
-
- return summary_generate[0]['summary_text']
-
- except Exception as e:
- st.warning("Something went wrong...\n\n",e,icon="⚠️")
-
-
-
-
-### displaying modals
-@st.cache_data
-def Modal_Level(modal_text):
- if modal_text == "google-pegasus":
- st.markdown(
- f"""
-
-
- Maodal-
-
- google/pegasus-xsum
-
- """,unsafe_allow_html=True
- )
-
- elif modal_text == "facebook-bart":
- st.markdown(
- f"""
-
- Maodal-
-
- facebook/bart-large-cnn
-
- """,unsafe_allow_html=True
- )
-
- elif modal_text == "t5-base":
- st.markdown(
- f"""
-
- Maodal-
-
- t5-base
-
- """,unsafe_allow_html=True
- )
- if __name__=="__main__":
- insert_css("cssfiles/modal.css")
-
-
-
-#### creating sidebar
-app_sidebar = st.sidebar
-
-with app_sidebar:
- st.text("")
- st.subheader("GenAi Summarizer🤖")
- st.write("Developer: **Nishant Maity**")
- st.text("")
- st.text("")
-
- ### creating menu bar
- Main_menu = option_menu(
- menu_title="",
- options=["Article Summarizer","Text Summarizer","PDF Summarizer","App Info"],
- icons=["chat-dots","card-heading","file-earmark-pdf","person-circle"],
- default_index=0,
- key="Menu Bar"
- )
- st.text("")
-
- ### select modal for text and article summarizer
- if Main_menu == "Article Summarizer" or Main_menu == "Text Summarizer":
-
- Summarizer_modal = st.selectbox(
- label="Select Modal",
- options=np.array(list(Hugingface_modals.keys())),
- index=1,
- key="Modals"
- )
-
-#### selecting number or paragraph for article summarizer
-if Main_menu == "Article Summarizer":
- with app_sidebar:
- st.text("")
- st.text("")
-
- Number_of_article_paragraph = st.slider(
- label="Number of paragraph",
- min_value=1,max_value=10,
- step=1,value=2,
- key="Number of paragraph"
- )
-
-with app_sidebar:
- st.button(
- label="Watch App Tutorial",
- use_container_width=True,
- on_click=watch_tutorial
- )
-
-
-##### article summarizer functions
-
-##### naive bayes text classification function
-
-def is_url(text):
- url_pattern = re.compile(
- r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+|(?:www\.)[^\s]+')
- return bool(url_pattern.match(text))
-
-
-# Train a model for text vs URL classification
-def train_model():
- """
- this function predict the given input
- is a simple text or url,link
- and generate output.
- """
- #### dataset (normal text and URLs)
- try:
- data = [
- ('This is a normal sentence.', 'text'),
- ('www.google.com', 'url'),
- ('Check out this website', 'text'),
- ('https://www.example.com', 'url'),
- ('Machine learning is fun', 'text'),
- ('http://openai.com', 'url'),
- ('Python is a great language', 'text'),
- ]
- texts = [d[0] for d in data]
- labels = [1 if d[1] == 'url' else 0 for d in data] ## 1 for url, 0 for text
-
- ##### modal training
- X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
-
- model = make_pipeline(CountVectorizer(), MultinomialNB())
-
- model.fit(X_train, y_train) #### Train the model
-
- model.score(X_train, y_train)
- model.score(X_test, y_test)
-
- return model
-
- except Exception as e:
- st.error("Error...\n\n",e,icon="⚠️")
-
-
-
-############################### article summarizer
-
-
-if Main_menu == "Article Summarizer":
-
- blank_article1, article_column, blank_article2 = st.columns([2,8,2],gap="small")
-
- with blank_article1: ### blank space
- pass
- with blank_article2: ### blank space
- pass
-
- #### main app column
- with article_column:
-
- #### app title
- st.text("")
- App_Title = colored_header(
- label="Web Article Summarizer 📑",
- color_name="blue-green-70",
- description="Search or paste url"
- )
-
- Text_input = st.text_input(
- label="Search or paste url",
- placeholder="machine learning, java url- https://www.example.com"
- )
-
- ### max slider value
- def max_length_slider_value(max_length)->int:
- if max_length == 1:
- return 90
- elif max_length == 2:
- return 150
- elif max_length == 3:
- return 250
- elif max_length == 4:
- return 380
- elif max_length == 5:
- return 470
- elif max_length == 6:
- return 600
- elif max_length == 7:
- return 750
- elif max_length == 8:
- return 900
- elif max_length == 9:
- return 1200
- elif max_length == 10:
- return 1360
-
- @st.cache_data
- def Default_max_length(default_value):
- if default_value == 1:
- random_value = np.random.randint(30,65,6)
- return random.choice(random_value)
-
- elif default_value == 2:
- random_value = np.random.randint(50,130,6)
- return random.choice(random_value)
-
- elif default_value == 3:
- random_value = np.random.randint(70,210,6)
- return random.choice(random_value)
-
- elif default_value == 4:
- random_value = np.random.randint(140,310,6)
- return random.choice(random_value)
-
- elif default_value == 5:
- random_value = np.random.randint(200,390,6)
- return random.choice(random_value)
-
- elif default_value == 6:
- random_value = np.random.randint(230,490,6)
- return random.choice(random_value)
-
- elif default_value == 7:
- random_value = np.random.randint(280,590,6)
- return random.choice(random_value)
-
- elif default_value == 8:
- random_value = np.random.randint(350,750,6)
- return random.choice(random_value)
-
- elif default_value == 9:
- random_value = np.random.randint(450,1050,6)
- return random.choice(random_value)
-
- elif default_value == 10:
- random_value = np.random.randint(560,1100,6)
- return random.choice(random_value)
-
-
-
-
- Button_column, Toggle_summary_btn, Modal_display = st.columns([1,1,3],gap="small")
-
-
- # article_summarizer(max_length)
- with Button_column:
- ### generate article button
- Generate_btn = st.button(label="Generate Article")
-
- with Toggle_summary_btn:
- ### if on then it generates summary
- summary_on = st.toggle(
- label="Summarizer",
- value=False,
- key="Summarizer on off"
- )
-
- if summary_on:
- st.toast(body="Summarizer Mode on",icon="📑")
- else:
- st.toast(body="Scraping Mode",icon="📰")
-
- with Modal_display:
-
- if summary_on:
- Modal_Level(Summarizer_modal)
- else:
- pass
- if summary_on:
- max_length_article = st.slider(
- label="max length",
- min_value=10,max_value=max_length_slider_value(Number_of_article_paragraph),
- key="max length",value=Default_max_length(Number_of_article_paragraph)
- )
-
-
-################################################################################################
-
-
- ### article scraper function
- def article_scraper(article_url):
- """
- this function is used to scrap
- web articles and it provide
- text in the clean format
- """
- try:
- article = Article(article_url) ### article object
- article.download()
- article.parse()
- nltk.download("punkt")
- article.nlp()
-
- st.markdown("Article ",unsafe_allow_html=True)
- st.text("")
- st.text("")
-
- st.markdown( ### article title
- f"""
- {article.title}
- """,unsafe_allow_html=True
- )
-
- article_publishdate = article.publish_date ### article publish date
- if article_publishdate == None:
- pass
- else:
- st.text("published on - "+str(article_publishdate))
-
- article_authors = article.authors #### article authors
- if len(article_authors) == 0:
- pass
- else:
- autho_name_print = ", ".join(map(str, article_authors))
- st.write(autho_name_print)
-
-
- ### generating article summary
- def get_top_paragraphs(text, num_paragraphs=Number_of_article_paragraph):
- """
- this function gives
- top 1 - 10 paragraph of the
- scrap data
- """
- paragraphs = text.split('\n\n')
-
- valid_paragraphs = [p.strip() for p in paragraphs if len(p.strip().split()) > 12]
- top_paragraphs = valid_paragraphs[:num_paragraphs]
- return '\n\n'.join(top_paragraphs)
-
-
- article_summary = article.text
-
- def remove_bracketed_numbers(text)->str:
- pattern = r'\[\d+\]'
- cleaned_text = re.sub(pattern, '', text)
- return cleaned_text
-
-
- cleaned_article_text = remove_bracketed_numbers(get_top_paragraphs(article_summary))
-
- if "clean_text" not in st.session_state:
- st.session_state.clean_text = ""
-
- st.session_state.clean_text = cleaned_article_text
-
- def clean_output_text(text:str)->str:
- """
- it gives clean text without emojies,
- no ascii values english text
- """
- clean_text = clean(
- text=text,fix_unicode=True,
- to_ascii=True,no_emoji=True,
- lang="en",no_line_breaks=False,
- keep_two_line_breaks=True
- )
- return clean_text
- ### Print the cleaned text
- st.write(clean_output_text(st.session_state.clean_text))
- st.text("")
- st.text("")
-
-
- ### copy download button
- Article_filename = f"{article.title}.doc"
-
- Article_text_format = f"""
- \n\n\n
-{str(article.title)}
-published on - {str(article_publishdate)}
-Authors - {", ".join(map(str, article_authors))}
- \n\n\n
-{str(cleaned_article_text)}
- """
-
-
- if __name__=="__main__":
- Copy_download_button(
- article_text=clean_output_text(cleaned_article_text),
- article_format=Article_text_format,
- article_file_name=Article_filename
- )
-
- st.text("")
-
- if summary_on:
- st.markdown("Article Summary ",unsafe_allow_html=True)
-
- #### summarization modal
-
- with st.spinner("Generating Summary..."):
-
-
- if __name__=="__main__":
- summarized_article_text = Hugingface_summarization_modal(
- summary_text=clean_output_text(cleaned_article_text),
- modal_name=Summarizer_modal,
- maximum_length=max_length_article
- )
- #### clean ai generated paragraph
-
-
- st.write(summarized_article_text)
- st.text("")
- st.text("")
-
- summary_format = f"""
-
-\n\n
-{article.title}
-\n\n\n
-{summarized_article_text}
-"""
- #### copy or download summary button
- if __name__=="__main__":
- Copy_download_button(
- article_text=summarized_article_text,
- article_file_name=f"{article.title}-summary.doc",
- article_format=summary_format
- )
-
- if summary_on:
-
- ### summarization details
- summarization_details = {
- "Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"],
- "Output":[
- f"{Summarizer_modal}",
- f"Length - {len(cleaned_article_text.split())}",
- f"Length - {len(summarized_article_text.split())}",
- f"Tokens Used - {max_length_article}"
- ]
- }
-
- summarization_details_df = pd.DataFrame(
- data=summarization_details,
- index=["Hugingface Modal","No. words","No. Words","Max Length"]
- )
-
- st.text("")
- st.text("")
- st.text("")
- st.dataframe(summarization_details_df,use_container_width=True)
-
-
-
- except Exception as err:
- ### 404 error animation
-
- Error_404_col, page_not_found_col = st.columns(2)
-
- with Error_404_col:
-
- try:
- Error_404 = insert_lottie_animation("lottie_animations/error-404.json")
- st_lottie(
- animation_source=Error_404,
- speed=1,
- reverse=False,loop=True,
- quality="high",
- height=315,
- width=400,
- key="404 error"
- )
- except Exception as err:
- st.warning("something went wrong...",err,icon="⚠️")
-
- with page_not_found_col:
-
- try:
- page_not_found = insert_lottie_animation("lottie_animations/page-not-found.json")
- st_lottie(
- animation_source=page_not_found,
- speed=1,
- reverse=False,loop=True,
- quality="high",
- height=265,
- width=400,
- key="page not found"
- )
- except Exception as err:
- st.warning("something went wrong...",err,icon="⚠️")
-
- st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")
-
- def article_summarizer(summary_length):
- st.write(summary_length)
-
-
- def check_url_exists(url):
- try:
- response = requests.head(url, allow_redirects=True)
- if response.status_code < 400:
- return True
- else:
- return False
- except requests.exceptions.RequestException as e:
- # Handle any exception (e.g., connection error, timeout)
- return False
-
-
- ########### link classified article
- def link_classified(text):
- """
- it use url or link to scrap articles
- provide author name, publish date, summary of
- article
- """
- try:
- url_text = text
- article_url_link = f"{url_text}" ### url to scrap
- if __name__=="__main__":
- article_scraper(article_url_link)
- st.text("")
- st.text("")
-
- if check_url_exists(article_url_link):
- st.link_button(label="Visit Article",url=(article_url_link))
- else:
- st.warning("Url does not exist...",icon="⚠️")
-
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
-
- except Exception as err:
- st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")
-
-
-
- ####$ text classified article
- def text_classified(text):
- """
- it use wikipedia to scrap articles
- provide author name, publish date, summary of
- article
- """
- try:
- url_text = text.replace(" ","_")
- article_url = f"https://en.wikipedia.org/wiki/{url_text}" ### url to scrap
- if __name__=="__main__":
- article_scraper(article_url)
- st.text("")
- st.text("")
-
- if check_url_exists(article_url):
- st.link_button(label="Visit Article",url=article_url)
- else:
- st.warning("Url does not exist...",icon="⚠️")
-
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
-
-
- except Exception as e:
- st.warning("Something went wrong...",e,icon="⚠️")
-
-
-
-############################################################################################
-
- ### j query animation
- if not Generate_btn or Text_input.strip() == "":
-
- try:
- def particle(Js_file):
- with open(Js_file) as f:
- component.html(f"{f.read()}", height=420)
-
- if __name__=="__main__":
- particle("animation/particles.html")
-
- except Exception as e:
- st.error("Something went wrong...\n\n",e)
-
- if Generate_btn:
- if Text_input.strip() != "":
- st.text("")
- st.text("")
-
- ### Function to classify the input text
- def classify_input(text, model):
- try:
- if is_url(text):
- link_classified(text)
- else:
- #### If it's not detected as a URL
- prediction = model.predict([text])[0]
- if prediction == 1:
- link_classified(Text_input)
- else:
- text_classified(Text_input)
- except Exception as e:
- st.error("Error...\n\n",e,icon="⚠️")
-
- with st.spinner("Generating Article..."):
- if __name__=="__main__":
- model = train_model()
- classify_input(Text_input, model)
-
-
-
-####################################################################################################
-
-
-################################# Text summarizer
-
-
-if Main_menu == "Text Summarizer":
-
- blank_text_sum1, text_summarizer_col, blank_text_sum2 = st.columns([2,8,2],gap="small")
-
- ### blank columns
- with blank_text_sum1:
- pass
- with blank_text_sum2:
- pass
-
- ### text summarizer app column
-
- with text_summarizer_col:
- #### app title
- st.text("")
- text_summarizer_Title = colored_header(
- label="Text Summarizer 📄",
- color_name="violet-70",
- description="enter or paste text hear"
- )
-
- placeholder_text = """write or paste your text hear
-paragraph length should be greater then 30 words
-to generate output tap on screen or press ctrl+enter
- """
-
- ### input box
- text_summarizer_input = st.text_area(
- label="Enter Text Hear",
- placeholder=placeholder_text,
- height=340,
- key="text summarizer"
- )
- Modal_Level(Summarizer_modal)
-
- if text_summarizer_input.strip() == "":
-
- try:
- #### writing animation
- write_hear_animation = insert_lottie_animation("lottie_animations/write-hear.json")
- st_lottie(
- animation_source=write_hear_animation,
- speed=1,
- reverse=False,loop=True,
- quality="medium",
- height=165,
- width=240,
- key="write hear"
- )
- except Exception as err:
- st.warning("something went wrong...",err,icon="⚠️")
-
- ### enter paragraph length greater than 35 words
- elif len(text_summarizer_input.split()) < 20:
- st.warning("paragraph should be greater than 35 words",icon="✏️")
-
- else:
-
- def word_token_maxvalue(text:str)->int:
- """
- converting paragraph into
- tokens
- """
- word_para = []
- words = word_tokenize(text)
- for i in words:
- word_para.append(i)
-
- return len(word_para)
-
- @st.cache_data
- def random_value_text(text:str)->int:
- random_value = np.random.randint(
- 10,word_token_maxvalue(text),6
- )
-
- return random.choice(random_value)
-
- def clean_data_for_summarization(text:str)->str:
- clean_text = clean(
- text=text,fix_unicode=True,
- to_ascii=True,no_emoji=True,
- lang="en",no_line_breaks=False,
- keep_two_line_breaks=True
- )
- return clean_text
-
-
-
- text_Max_length = st.slider(
- label="Max length",
- min_value=10,
- max_value=word_token_maxvalue(text_summarizer_input),
- key="text summarizer max length",
- step=1,value=random_value_text(text_summarizer_input)
- )
-
- Generate_text_summary = st.button(
- label="Generate summary",key="text summary"
- )
-
- try:
- #### writing loading
- writing_loading_animation = insert_lottie_animation("lottie_animations/writing-loading.json")
- summary_generating_animation = st_lottie_spinner(
- animation_source=writing_loading_animation,
- speed=2,
- reverse=False,loop=True,
- quality="medium",
- height=165,
- width=240,
- key="writing generating"
- )
- except Exception as err:
- st.warning("something went wrong...",err,icon="⚠️")
-
-
- #### initilization of modal
- if Generate_text_summary:
-
- if __name__=="__main__":
-
- ##### summary generation
- with summary_generating_animation:
-
- ### modal
- Text_Summary_output = Hugingface_summarization_modal(
- summary_text=clean_data_for_summarization(text_summarizer_input),
- modal_name=Summarizer_modal,
- maximum_length=text_Max_length
- )
-
- ##### summary displaying and copy
- st.text("")
- st.text("")
- st.markdown("Generated Summary ",unsafe_allow_html=True)
- st.text("")
- st.write(Text_Summary_output)
- st.text("")
-
- copy_text(Text_Summary_output)
- st.text("")
- st.text("")
-
- ###### original text desplay and copy
- st.markdown("Original Text ",unsafe_allow_html=True)
- st.text("")
- original_text = clean_data_for_summarization(text_summarizer_input)
- st.write(original_text)
- st.text("")
- copy_text(original_text)
-
- st.text("")
- st.text("")
- st.text("")
-
- ### summarization details
- text_summarization_details = {
- "Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"],
- "Output":[
- f"{Summarizer_modal}",
- f"Length - {len(text_summarizer_input.split())}",
- f"Length - {len(Text_Summary_output.split())}",
- f"Tokens Used - {text_Max_length}"
- ]
- }
-
- summarization_details_df = pd.DataFrame(
- data=text_summarization_details,
- index=["Hugingface Modal","No. words","No. Words","Max Length"]
- )
-
- st.text("")
- st.text("")
- st.text("")
- st.dataframe(summarization_details_df,use_container_width=True)
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
-
-
-
-##############################################################################################################
-
-############################## pdf summarizer
-
-
-#### pdf and text summarizer functions
-
-
-#### displaying uploaded pdf file
-def display_pdf_file(uploaded_file):
- """
- it is used to display the
- file on screen
- """
- #### saving the uploaded file
- def save_uploadfile(save_file):
- with open(os.path.join("data",save_file.name),"wb") as f:
- f.write(save_file.getbuffer())
- return st.toast("file uploaded: {}".format(save_file.name))
-
- try:
- ### display pdf on screen
- def displayPDF(pdf_file):
- with open(pdf_file,"rb") as f:
- base64_pdf = base64.b64encode(f.read()).decode("utf-8")
-
- pdf_display = f"""
-
- """
-
- st.markdown(pdf_display,unsafe_allow_html=True)
-
- ### save and display file
- save_uploadfile(uploaded_file)
- pdf_file = "data/"+uploaded_file.name
- displayPDF(pdf_file)
- except Exception as e:
- st.warning("Something Went wrong...\n\n",e,icon="⚠️")
-
-
-#### Function to extract text from a specific page using pdfminer
-def extract_text_pdfminer(pdf_file, page_number):
- """
- this function extract pdf file
- text by user input page number
- """
- try:
- extracted_text = ''
- for i, page_layout in enumerate(extract_pages(pdf_file)):
- if i == page_number - 1:
- ### Extract text elements and format them as closely as possible to the original layout
- for element in page_layout:
- if isinstance(element, LTTextContainer):
- for text_line in element:
- if isinstance(text_line, LTTextLine):
- line = ''.join([char.get_text() for char in text_line if isinstance(char, LTChar)])
- extracted_text += line.strip() + '\n'
- return extracted_text
- return st.warning("Invalid page number.",icon="⚠️")
- except Exception as e:
- st.warning("Something Went wrong...\n\n",e,icon="⚠️")
-
-
-###############################################
-
-
-##### clean text for summmarization task
-def uploaded_Clean_Text_Summarization(clean_text:str)->str:
- """
- it gives clean text for
- summarization task
- """
- try:
- pattern = r'[|`~^$<>]'
- cleaned_paragraph = re.sub(pattern, '', clean_text)
-
- ### using clean function
- clean_output_para = clean(
- text=cleaned_paragraph,fix_unicode=True,
- to_ascii=True,no_emoji=True,
- lang="en",no_line_breaks=False,
- keep_two_line_breaks=True
- )
-
- except Exception as e:
- st.warning("Something Went wrong...\n\n",e,icon="⚠️")
-
- return clean_output_para
-
-
-### convert paragraph into tokens
-def generate_text_para_tokens(text_para:str)->int:
- """
- converting paragraph into
- tokens
- """
- try:
- pattern = r'[|`~#^$<>]'
- cleaned_paragraph = re.sub(pattern, '', text_para)
-
- #### using clean function
- clean_para = clean(
- text=cleaned_paragraph,fix_unicode=True,
- to_ascii=True,no_emoji=True,
- lang="en",no_line_breaks=False,
- keep_two_line_breaks=True
- )
-
- word_tokens = []
-
- for i in word_tokenize(clean_para):
- word_tokens.append(i)
- return len(np.array(word_tokens))
-
- except Exception as e:
- st.warning("Something Went wrong...\n\n",e,icon="⚠️")
-
-
-
- ### generates random value for slider
-@st.cache_data
-def random_text_para_value(para:str)->int:
- try:
- random_value = np.random.randint(
- 20, generate_text_para_tokens(para), 6
- )
- return random.choice(random_value)
- except Exception as e:
- st.warning("Something Went wrong...\n\n",e,icon="⚠️")
-
-
-#### PDF files summarizer
-def process_pdf(file):
- reader = PdfReader(file)
- page_count = len(reader.pages)
-
- ### pdf display and information column
- pdf_display_tab, pdf_summarizer_tab = st.tabs([f"Displaying {file.name}","Pdf Summarizer"])
-
- ####### displaying pdf on pdf display tab
- with pdf_display_tab:
- st.markdown(f"Pdf - {file.name} ",unsafe_allow_html=True)
-
- pdf_col, pdf_info_col = st.columns([5,3],gap="medium")
- with pdf_col:
- with st.spinner("Displaying file..."):
- if __name__=="__main__":
- display_pdf_file(file)
-
- with pdf_info_col:
- st.write("Your File: {}".format(file.name))
- st.write(f"Number of pages: {str(page_count)}")
- st.markdown(insert_html("htmlfiles/pdf-summarizer-info.html"),unsafe_allow_html=True)
-
-
- ### pdf information and intract with pdf
- with pdf_summarizer_tab:
-
- st.text("")
- st.markdown("Extract pdf text ",unsafe_allow_html=True)
-
- ### toggle button for extracting text
- extract_by_page_all = st.toggle(
- label="Extract whole Text",key="toggle for extract text",
- value=False
- )
-
- ### extracting all pdf text
- if extract_by_page_all:
- st.write("Extract whole pdf Text")
-
- if st.button("Extract Whole Pdf",key="whole pdf text extract"):
-
- st.text("")
- st.text("")
-
- with st.spinner("Extracting pdf..."):
- whole_pdf_text = extract_text(file)
- st.markdown("Whole PDF Text ",unsafe_allow_html=True)
- st.text("")
- st.write(whole_pdf_text)
- else:
- reader = PdfReader(file)
- total_pages = len(reader.pages)
- st.write("Extract by page Number")
-
- pdf_page_no_col, pdf_page_noinfo_col = st.columns([3,5],gap="small")
-
- with pdf_page_no_col:
-
- ### input page number
- Pdf_page_number_input = st.number_input(
- label="Select the page number",
- min_value=1, max_value=total_pages,
- value=1,key="pdf page number",step=1
- )
-
- with pdf_page_noinfo_col:
- st.text("")
- st.text("")
- st.write(f"Selected page: {str(Pdf_page_number_input)}")
-
- Extract_page_no_button = st.button(
- label="Extract Page text",
- key="Extract button for page"
- )
- st.text("")
- st.text("")
-
- if Extract_page_no_button:
- text_pdfminer = extract_text_pdfminer(file, Pdf_page_number_input)
- st.session_state['extracted_text'] = text_pdfminer ### Store the extracted text in session state
-
- if 'extracted_text' in st.session_state:
- Pdf_file_text = st.text_area(
- label=f"Text data of {Pdf_page_number_input} page",
- value= st.session_state['extracted_text'],
- height=400
- )
- st.session_state['extracted_text'] = Pdf_file_text # Update the text in session state based on user's input
-
- #### pdf summarizer
- st.text("")
- Max_length_pdf_slider = st.slider(
- label="Max Length",key="Pdf summarizer slider",
- min_value=10,max_value=generate_text_para_tokens(Pdf_file_text),
- value=random_text_para_value(Pdf_file_text)
- )
- st.text("")
-
- upload_Pdf_summary_btn_col, upload_Pdf_print_btn_col, upload_clean_Pdf_print_btn_col, blank_Pdf_col1, blank_Pdf_col2 = st.columns(
- [4,4,4,7,3],gap="small"
- )
-
- with blank_Pdf_col1:
- pass
- with blank_Pdf_col2:
- pass
-
- with upload_Pdf_summary_btn_col:
- Generate_upload_pdf_summary_btn = st.button(
- label="Generate Summary",
- key="Generate summary of uploaded text pdf"
- )
-
- with upload_clean_Pdf_print_btn_col:
- Upload_clean_pdf_btn = st.button(
- label="Print Clean Text",
- key="Print clean pdf file"
- )
-
-
- with upload_Pdf_print_btn_col:
- upload_pdf_print_button = st.button(
- label="Print Uploaded Text",
- key="Print uploadded pdf"
- )
-
- ### clean text
- if Upload_clean_pdf_btn:
- with st.spinner("Generating Clean Text..."):
- st.text("")
- st.text("")
- st.markdown("Clean Text ",unsafe_allow_html=True)
- st.text("")
- st.write(uploaded_Clean_Text_Summarization(Pdf_file_text))
- st.text("")
- copy_text(uploaded_Clean_Text_Summarization(Pdf_file_text))
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
-
- ### uploaded text
- elif upload_pdf_print_button:
- with st.spinner("Generating Uploaded Text..."):
- st.text("")
- st.text("")
- st.markdown("Uploaded Text ",unsafe_allow_html=True)
- st.text("")
- st.text(Pdf_file_text)
- st.text("")
- copy_text(Pdf_file_text)
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
-
- ### generating summary
- elif Generate_upload_pdf_summary_btn:
- st.text("")
- with st.spinner("Generating Summary..."):
- st.text("")
- if __name__=="__main__":
- Uploded_Pdf_file_Summary = Hugingface_summarization_modal(
- summary_text=uploaded_Clean_Text_Summarization(Pdf_file_text),
- maximum_length=Max_length_pdf_slider,
- modal_name="facebook-bart"
- )
- st.markdown("Summary ",unsafe_allow_html=True)
- st.text("")
-
- st.write(Uploded_Pdf_file_Summary)
- st.text("")
- copy_text(Uploded_Pdf_file_Summary)
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
-
-
-
-
-#################################################
-
-
-##### text file summarizer
-def process_text(file):
- text_file = file.read().decode("utf-8")
- st.text("")
- st.markdown("Text file ",unsafe_allow_html=True)
-
-
- ### displaying text you can edit also
- Uploaded_text = st.text_area(
- label=f"{file.name[:-4]} text data",
- value=text_file,key="text file data",
- height=400
- )
- st.write(f"**{file.name[:-4]}** Edit your file press ctrl+enter")
-
- ###3 if length is less than 20
- if len(Uploaded_text.split()) < 20:
- st.warning("Summarization Task failed\nnot enough amount of text...",icon="⚠️")
-
- else:
- st.text("")
- #### max length slider
- max_text_para_length = st.slider(
- label="Max Length",min_value=10,
- max_value=generate_text_para_tokens(Uploaded_text),
- step=1,key="paragraph length",
- value=random_text_para_value(Uploaded_text)
- )
- st.text("")
-
- upload_text_summary_btn_col, upload_text_print_btn_col, upload_clean_text_print_btn_col, blank_text_col1, blank_text_col2 = st.columns(
- [4,4,4,7,3],gap="small"
- )
-
- with blank_text_col1:
- pass
- with blank_text_col2:
- pass
-
- with upload_text_summary_btn_col:
- Generate_upload_text_summary_btn = st.button(
- label="Generate Summary",
- key="Generate summary of uploaded text"
- )
-
- with upload_clean_text_print_btn_col:
- Upload_clean_text_btn = st.button(
- label="Print Clean Text",
- key="Print clean text file"
- )
-
-
- with upload_text_print_btn_col:
- upload_text_print_button = st.button(
- label="Print Uploaded Text",
- key="Print uploadded text"
- )
-
- ### clean text
- if Upload_clean_text_btn:
- with st.spinner("Generating Clean Text..."):
- st.text("")
- st.text("")
- st.markdown("Clean Text ",unsafe_allow_html=True)
- st.text("")
- st.write(uploaded_Clean_Text_Summarization(Uploaded_text))
- st.text("")
- copy_text(uploaded_Clean_Text_Summarization(Uploaded_text))
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",
- unsafe_allow_html=True)
-
- ### uploaded text
- elif upload_text_print_button:
- with st.spinner("Generating Uploaded Text..."):
- st.text("")
- st.text("")
- st.markdown("Uploaded Text ",unsafe_allow_html=True)
- st.text("")
- st.text(Uploaded_text)
- st.text("")
- copy_text(Uploaded_text)
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",
- unsafe_allow_html=True)
-
-
- ### generating summary
- elif Generate_upload_text_summary_btn:
- st.text("")
- with st.spinner("Generating Summary..."):
- st.text("")
- if __name__=="__main__":
- Uploded_Text_file_Summary = Hugingface_summarization_modal(
- summary_text=uploaded_Clean_Text_Summarization(Uploaded_text),
- maximum_length=max_text_para_length,
- modal_name="facebook-bart"
- )
- st.markdown("Summary ",unsafe_allow_html=True)
- st.text("")
-
- st.write(Uploded_Text_file_Summary)
- st.text("")
- copy_text(Uploded_Text_file_Summary)
- st.text("")
- st.text("")
- st.text("")
- st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
-
-
-
-if Main_menu == "PDF Summarizer":
-
- ### blank and app columns
- Blank_pdf1 ,pdf_summarizer_col, Blank_pdf2 = st.columns([1,8,1],gap="small")
-
- with Blank_pdf1:
- pass
- with Blank_pdf2:
- pass
-
- with pdf_summarizer_col:
- st.text("")
- st.header("PDF Summarizer") ### app heading
-
- ### File uploader function
- app_file_upload = st.file_uploader("Upload a PDF or Text file", type=["pdf", "txt"])
-
- if app_file_upload is not None:
-
- ### if pdf file
- if app_file_upload.type == "application/pdf":
- if __name__=="__main__":
- process_pdf(app_file_upload)
-
- #### if text file
- elif app_file_upload.type == "text/plain":
- if __name__=="__main__":
- process_text(app_file_upload)
-
- else:
- st.info("Upload your pdf, text file")
-
-
- #### app info
-if Main_menu == "App Info":
- Blank_app_info1, App_info_col, Blank_app_info2 = st.columns([2,8,2])
-
- #### blank columns
- with Blank_app_info1:
- pass
- with Blank_app_info2:
- pass
-
- ### app info column
- with App_info_col:
- st.text("")
- st.header("App Info")
- st.text("")
-
- if __name__=="__main__":
- st.markdown(insert_html("htmlfiles/app-info.html"),
- unsafe_allow_html=True
- )
-
+import streamlit as st ### importing liberaries
+from streamlit_extras.colored_header import colored_header
+from streamlit_option_menu import option_menu
+import streamlit.components.v1 as component
+from streamlit_lottie import st_lottie, st_lottie_spinner
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.pipeline import make_pipeline
+from transformers import pipeline
+from transformers import AutoTokenizer , AutoModelForSeq2SeqLM
+from newspaper import Article
+import nltk
+import nltk.downloader
+nltk.download('punkt_tab')
+from nltk.tokenize import word_tokenize
+from cleantext import clean
+from PyPDF2 import PdfReader
+import pdfminer
+from pdfminer.high_level import extract_text
+from pdfminer.high_level import extract_pages
+from pdfminer.layout import LTTextContainer, LTChar, LTTextLine
+import requests
+import json
+import numpy as np
+import pandas as pd
+import random
+import base64
+import lxml
+import lxml_html_clean
+import re
+import os
+
+
+###### main app functions
+
+### insert external css
+def insert_css(css_file:str):
+ with open(css_file) as f:
+ st.markdown(f"",unsafe_allow_html=True)
+
+### insert external html file
+def insert_html(html_file):
+ with open(html_file) as f:
+ return f.read()
+
+### insert lottie animation json files
+def insert_lottie_animation(animation_file:str):
+ with open(animation_file, "r") as f:
+ return json.load(f)
+
+### app tutorial video function
+@st.dialog("App Tutorial")
+def watch_tutorial():
+ st.subheader("GenAi Summarizer🤖")
+ video_file = open("app_tutorial.mp4", "rb")
+ video_bytes = video_file.read()
+ st.text("")
+ st.video(
+ data=video_bytes,format="video/mp4",
+ loop=True,autoplay=True
+ )
+
+
+def download_text(text, filename):
+ """
+ download article text
+ in document format
+ """
+ #### Convert string to bytes
+ b64 = base64.b64encode(text.encode()).decode()
+
+ href = f"""
+
+ Download
+
+ """
+
+ st.markdown(href, unsafe_allow_html=True)
+ if __name__=="__main__":
+ insert_css("cssfiles/download-article.css")
+
+
+def copy_text(text):
+ html_code = f"""
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {text}
+
+
+
+
+
+ """
+
+ component.html(html_code,height=28)
+
+
+### copy and download button
+def Copy_download_button(article_text,article_format,article_file_name):
+ try:
+ ### column for copy and download article
+ Copy_btn_col,download_btn_col, blank_col_copy1, blank_col_copy2= st.columns([1,3,5,5],gap="small")
+
+ with blank_col_copy1:
+ st.text("")
+ with blank_col_copy1:
+ st.text("")
+
+ with Copy_btn_col:
+ copy_text(article_text)
+
+ with download_btn_col:
+ download_text(text=article_format,filename=article_file_name)
+ except Exception as e:
+ st.warning("Something went wrong...",e,icon="⚠️")
+
+
+### setting page layout
+st.set_page_config(
+ page_title="GenAi Summarizer",
+ page_icon="🤗",
+ initial_sidebar_state="collapsed",
+ layout="wide"
+)
+
+
+#### app settings css
+if __name__=="__main__":
+ insert_css("cssfiles/app.css")
+
+
+### huging face modals
+Hugingface_modals = {
+ "google-pegasus":"google/pegasus-xsum",
+ "facebook-bart":"facebook/bart-large-cnn",
+ "t5-base":"t5-base"
+}
+
+
+### summarization modal
+def Hugingface_summarization_modal(summary_text,modal_name,maximum_length):
+ """
+ it is an text summarization modal
+ it use hugingface modals for summarization task.
+ it generates summarized text output
+ """
+ def summarization_modal_name(modal)->str:
+ if modal == "google-pegasus":
+ return "google/pegasus-xsum"
+ elif modal == "facebook-bart":
+ return "facebook/bart-large-cnn"
+ elif modal == "t5-base":
+ return "t5-base"
+ try:
+ use_modal = summarization_modal_name(modal_name) ### modal name
+
+ auto_tokenizer = AutoTokenizer.from_pretrained(use_modal) ### using autokenizer for pretrained modal
+ auto_modal = AutoModelForSeq2SeqLM.from_pretrained(use_modal)
+
+ ### creating pipeline
+ summarizer = pipeline("summarization",model=auto_modal,tokenizer=auto_tokenizer)
+
+ summarizer_text = summary_text
+
+ summary_generate = summarizer( ### summarizer
+ summarizer_text,max_length=maximum_length+20,
+ min_length=maximum_length,
+ do_sample=False
+ )
+
+ return summary_generate[0]['summary_text']
+
+ except Exception as e:
+ st.warning("Something went wrong...\n\n",e,icon="⚠️")
+
+
+
+
+### displaying modals
+@st.cache_data
+def Modal_Level(modal_text):
+ if modal_text == "google-pegasus":
+ st.markdown(
+ f"""
+
+
+ Maodal-
+
+ google/pegasus-xsum
+
+ """,unsafe_allow_html=True
+ )
+
+ elif modal_text == "facebook-bart":
+ st.markdown(
+ f"""
+
+ Maodal-
+
+ facebook/bart-large-cnn
+
+ """,unsafe_allow_html=True
+ )
+
+ elif modal_text == "t5-base":
+ st.markdown(
+ f"""
+
+ Maodal-
+
+ t5-base
+
+ """,unsafe_allow_html=True
+ )
+ if __name__=="__main__":
+ insert_css("cssfiles/modal.css")
+
+
+
+#### creating sidebar
+app_sidebar = st.sidebar
+
+with app_sidebar:
+ st.text("")
+ st.subheader("GenAi Summarizer🤖")
+ st.write("Developer: **Nishant Maity**")
+ st.text("")
+ st.text("")
+
+ ### creating menu bar
+ Main_menu = option_menu(
+ menu_title="",
+ options=["Article Summarizer","Text Summarizer","PDF Summarizer","App Info"],
+ icons=["chat-dots","card-heading","file-earmark-pdf","person-circle"],
+ default_index=0,
+ key="Menu Bar"
+ )
+ st.text("")
+
+ ### select modal for text and article summarizer
+ if Main_menu == "Article Summarizer" or Main_menu == "Text Summarizer":
+
+ Summarizer_modal = st.selectbox(
+ label="Select Modal",
+ options=np.array(list(Hugingface_modals.keys())),
+ index=1,
+ key="Modals"
+ )
+
+#### selecting number or paragraph for article summarizer
+if Main_menu == "Article Summarizer":
+ with app_sidebar:
+ st.text("")
+ st.text("")
+
+ Number_of_article_paragraph = st.slider(
+ label="Number of paragraph",
+ min_value=1,max_value=10,
+ step=1,value=2,
+ key="Number of paragraph"
+ )
+
+with app_sidebar:
+ st.button(
+ label="Watch App Tutorial",
+ use_container_width=True,
+ on_click=watch_tutorial
+ )
+
+
+##### article summarizer functions
+
+##### naive bayes text classification function
+
+def is_url(text):
+ url_pattern = re.compile(
+ r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+|(?:www\.)[^\s]+')
+ return bool(url_pattern.match(text))
+
+
+# Train a model for text vs URL classification
+def train_model():
+ """
+ this function predict the given input
+ is a simple text or url,link
+ and generate output.
+ """
+ #### dataset (normal text and URLs)
+ try:
+ data = [
+ ('This is a normal sentence.', 'text'),
+ ('www.google.com', 'url'),
+ ('Check out this website', 'text'),
+ ('https://www.example.com', 'url'),
+ ('Machine learning is fun', 'text'),
+ ('http://openai.com', 'url'),
+ ('Python is a great language', 'text'),
+ ]
+ texts = [d[0] for d in data]
+ labels = [1 if d[1] == 'url' else 0 for d in data] ## 1 for url, 0 for text
+
+ ##### modal training
+ X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
+
+ model = make_pipeline(CountVectorizer(), MultinomialNB())
+
+ model.fit(X_train, y_train) #### Train the model
+
+ model.score(X_train, y_train)
+ model.score(X_test, y_test)
+
+ return model
+
+ except Exception as e:
+ st.error("Error...\n\n",e,icon="⚠️")
+
+
+
+############################### article summarizer
+
+
+if Main_menu == "Article Summarizer":
+
+ blank_article1, article_column, blank_article2 = st.columns([2,8,2],gap="small")
+
+ with blank_article1: ### blank space
+ pass
+ with blank_article2: ### blank space
+ pass
+
+ #### main app column
+ with article_column:
+
+ #### app title
+ st.text("")
+ App_Title = colored_header(
+ label="Web Article Summarizer 📑",
+ color_name="blue-green-70",
+ description="Search or paste url"
+ )
+
+ Text_input = st.text_input(
+ label="Search or paste url",
+ placeholder="machine learning, java url- https://www.example.com"
+ )
+
+ ### max slider value
+ def max_length_slider_value(max_length)->int:
+ if max_length == 1:
+ return 90
+ elif max_length == 2:
+ return 150
+ elif max_length == 3:
+ return 250
+ elif max_length == 4:
+ return 380
+ elif max_length == 5:
+ return 470
+ elif max_length == 6:
+ return 600
+ elif max_length == 7:
+ return 750
+ elif max_length == 8:
+ return 900
+ elif max_length == 9:
+ return 1200
+ elif max_length == 10:
+ return 1360
+
+ @st.cache_data
+ def Default_max_length(default_value):
+ if default_value == 1:
+ random_value = np.random.randint(30,65,6)
+ return random.choice(random_value)
+
+ elif default_value == 2:
+ random_value = np.random.randint(50,130,6)
+ return random.choice(random_value)
+
+ elif default_value == 3:
+ random_value = np.random.randint(70,210,6)
+ return random.choice(random_value)
+
+ elif default_value == 4:
+ random_value = np.random.randint(140,310,6)
+ return random.choice(random_value)
+
+ elif default_value == 5:
+ random_value = np.random.randint(200,390,6)
+ return random.choice(random_value)
+
+ elif default_value == 6:
+ random_value = np.random.randint(230,490,6)
+ return random.choice(random_value)
+
+ elif default_value == 7:
+ random_value = np.random.randint(280,590,6)
+ return random.choice(random_value)
+
+ elif default_value == 8:
+ random_value = np.random.randint(350,750,6)
+ return random.choice(random_value)
+
+ elif default_value == 9:
+ random_value = np.random.randint(450,1050,6)
+ return random.choice(random_value)
+
+ elif default_value == 10:
+ random_value = np.random.randint(560,1100,6)
+ return random.choice(random_value)
+
+
+
+
+ Button_column, Toggle_summary_btn, Modal_display = st.columns([1,1,3],gap="small")
+
+
+ # article_summarizer(max_length)
+ with Button_column:
+ ### generate article button
+ Generate_btn = st.button(label="Generate Article")
+
+ with Toggle_summary_btn:
+ ### if on then it generates summary
+ summary_on = st.toggle(
+ label="Summarizer",
+ value=False,
+ key="Summarizer on off"
+ )
+
+ if summary_on:
+ st.toast(body="Summarizer Mode on",icon="📑")
+ else:
+ st.toast(body="Scraping Mode",icon="📰")
+
+ with Modal_display:
+
+ if summary_on:
+ Modal_Level(Summarizer_modal)
+ else:
+ pass
+ if summary_on:
+ max_length_article = st.slider(
+ label="max length",
+ min_value=10,max_value=max_length_slider_value(Number_of_article_paragraph),
+ key="max length",value=Default_max_length(Number_of_article_paragraph)
+ )
+
+
+################################################################################################
+
+
+ ### article scraper function
+ def article_scraper(article_url):
+ """
+ this function is used to scrap
+ web articles and it provide
+ text in the clean format
+ """
+ try:
+ article = Article(article_url) ### article object
+ article.download()
+ article.parse()
+ nltk.download("punkt")
+ article.nlp()
+
+ st.markdown("Article ",unsafe_allow_html=True)
+ st.text("")
+ st.text("")
+
+ st.markdown( ### article title
+ f"""
+ {article.title}
+ """,unsafe_allow_html=True
+ )
+
+ article_publishdate = article.publish_date ### article publish date
+ if article_publishdate == None:
+ pass
+ else:
+ st.text("published on - "+str(article_publishdate))
+
+ article_authors = article.authors #### article authors
+ if len(article_authors) == 0:
+ pass
+ else:
+ autho_name_print = ", ".join(map(str, article_authors))
+ st.write(autho_name_print)
+
+
+ ### generating article summary
+ def get_top_paragraphs(text, num_paragraphs=Number_of_article_paragraph):
+ """
+ this function gives
+ top 1 - 10 paragraph of the
+ scrap data
+ """
+ paragraphs = text.split('\n\n')
+
+ valid_paragraphs = [p.strip() for p in paragraphs if len(p.strip().split()) > 12]
+ top_paragraphs = valid_paragraphs[:num_paragraphs]
+ return '\n\n'.join(top_paragraphs)
+
+
+ article_summary = article.text
+
+ def remove_bracketed_numbers(text)->str:
+ pattern = r'\[\d+\]'
+ cleaned_text = re.sub(pattern, '', text)
+ return cleaned_text
+
+
+ cleaned_article_text = remove_bracketed_numbers(get_top_paragraphs(article_summary))
+
+ if "clean_text" not in st.session_state:
+ st.session_state.clean_text = ""
+
+ st.session_state.clean_text = cleaned_article_text
+
+ def clean_output_text(text:str)->str:
+ """
+ it gives clean text without emojies,
+ no ascii values english text
+ """
+ clean_text = clean(
+ text=text,fix_unicode=True,
+ to_ascii=True,no_emoji=True,
+ lang="en",no_line_breaks=False,
+ keep_two_line_breaks=True
+ )
+ return clean_text
+ ### Print the cleaned text
+ st.write(clean_output_text(st.session_state.clean_text))
+ st.text("")
+ st.text("")
+
+
+ ### copy download button
+ Article_filename = f"{article.title}.doc"
+
+ Article_text_format = f"""
+ \n\n\n
+{str(article.title)}
+published on - {str(article_publishdate)}
+Authors - {", ".join(map(str, article_authors))}
+ \n\n\n
+{str(cleaned_article_text)}
+ """
+
+
+ if __name__=="__main__":
+ Copy_download_button(
+ article_text=clean_output_text(cleaned_article_text),
+ article_format=Article_text_format,
+ article_file_name=Article_filename
+ )
+
+ st.text("")
+
+ if summary_on:
+ st.markdown("Article Summary ",unsafe_allow_html=True)
+
+ #### summarization modal
+
+ with st.spinner("Generating Summary..."):
+
+
+ if __name__=="__main__":
+ summarized_article_text = Hugingface_summarization_modal(
+ summary_text=clean_output_text(cleaned_article_text),
+ modal_name=Summarizer_modal,
+ maximum_length=max_length_article
+ )
+ #### clean ai generated paragraph
+
+
+ st.write(summarized_article_text)
+ st.text("")
+ st.text("")
+
+ summary_format = f"""
+
+\n\n
+{article.title}
+\n\n\n
+{summarized_article_text}
+"""
+ #### copy or download summary button
+ if __name__=="__main__":
+ Copy_download_button(
+ article_text=summarized_article_text,
+ article_file_name=f"{article.title}-summary.doc",
+ article_format=summary_format
+ )
+
+ if summary_on:
+
+ ### summarization details
+ summarization_details = {
+ "Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"],
+ "Output":[
+ f"{Summarizer_modal}",
+ f"Length - {len(cleaned_article_text.split())}",
+ f"Length - {len(summarized_article_text.split())}",
+ f"Tokens Used - {max_length_article}"
+ ]
+ }
+
+ summarization_details_df = pd.DataFrame(
+ data=summarization_details,
+ index=["Hugingface Modal","No. words","No. Words","Max Length"]
+ )
+
+ st.text("")
+ st.text("")
+ st.text("")
+ st.dataframe(summarization_details_df,use_container_width=True)
+
+
+
+ except Exception as err:
+ ### 404 error animation
+
+ Error_404_col, page_not_found_col = st.columns(2)
+
+ with Error_404_col:
+
+ try:
+ Error_404 = insert_lottie_animation("lottie_animations/error-404.json")
+ st_lottie(
+ animation_source=Error_404,
+ speed=1,
+ reverse=False,loop=True,
+ quality="high",
+ height=315,
+ width=400,
+ key="404 error"
+ )
+ except Exception as err:
+ st.warning("something went wrong...",err,icon="⚠️")
+
+ with page_not_found_col:
+
+ try:
+ page_not_found = insert_lottie_animation("lottie_animations/page-not-found.json")
+ st_lottie(
+ animation_source=page_not_found,
+ speed=1,
+ reverse=False,loop=True,
+ quality="high",
+ height=265,
+ width=400,
+ key="page not found"
+ )
+ except Exception as err:
+ st.warning("something went wrong...",err,icon="⚠️")
+
+ st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")
+
+ def article_summarizer(summary_length):
+ st.write(summary_length)
+
+
+ def check_url_exists(url):
+ try:
+ response = requests.head(url, allow_redirects=True)
+ if response.status_code < 400:
+ return True
+ else:
+ return False
+ except requests.exceptions.RequestException as e:
+ # Handle any exception (e.g., connection error, timeout)
+ return False
+
+
+ ########### link classified article
+ def link_classified(text):
+ """
+ it use url or link to scrap articles
+ provide author name, publish date, summary of
+ article
+ """
+ try:
+ url_text = text
+ article_url_link = f"{url_text}" ### url to scrap
+ if __name__=="__main__":
+ article_scraper(article_url_link)
+ st.text("")
+ st.text("")
+
+ if check_url_exists(article_url_link):
+ st.link_button(label="Visit Article",url=(article_url_link))
+ else:
+ st.warning("Url does not exist...",icon="⚠️")
+
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
+
+ except Exception as err:
+ st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")
+
+
+
+ ####$ text classified article
+ def text_classified(text):
+ """
+ it use wikipedia to scrap articles
+ provide author name, publish date, summary of
+ article
+ """
+ try:
+ url_text = text.replace(" ","_")
+ article_url = f"https://en.wikipedia.org/wiki/{url_text}" ### url to scrap
+ if __name__=="__main__":
+ article_scraper(article_url)
+ st.text("")
+ st.text("")
+
+ if check_url_exists(article_url):
+ st.link_button(label="Visit Article",url=article_url)
+ else:
+ st.warning("Url does not exist...",icon="⚠️")
+
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
+
+
+ except Exception as e:
+ st.warning("Something went wrong...",e,icon="⚠️")
+
+
+
+############################################################################################
+
+ ### j query animation
+ if not Generate_btn or Text_input.strip() == "":
+
+ try:
+ def particle(Js_file):
+ with open(Js_file) as f:
+ component.html(f"{f.read()}", height=420)
+
+ if __name__=="__main__":
+ particle("animation/particles.html")
+
+ except Exception as e:
+ st.error("Something went wrong...\n\n",e)
+
+ if Generate_btn:
+ if Text_input.strip() != "":
+ st.text("")
+ st.text("")
+
+ ### Function to classify the input text
+ def classify_input(text, model):
+ try:
+ if is_url(text):
+ link_classified(text)
+ else:
+ #### If it's not detected as a URL
+ prediction = model.predict([text])[0]
+ if prediction == 1:
+ link_classified(Text_input)
+ else:
+ text_classified(Text_input)
+ except Exception as e:
+ st.error("Error...\n\n",e,icon="⚠️")
+
+ with st.spinner("Generating Article..."):
+ if __name__=="__main__":
+ model = train_model()
+ classify_input(Text_input, model)
+
+
+
+####################################################################################################
+
+
+################################# Text summarizer
+
+
+if Main_menu == "Text Summarizer":
+
+ blank_text_sum1, text_summarizer_col, blank_text_sum2 = st.columns([2,8,2],gap="small")
+
+ ### blank columns
+ with blank_text_sum1:
+ pass
+ with blank_text_sum2:
+ pass
+
+ ### text summarizer app column
+
+ with text_summarizer_col:
+ #### app title
+ st.text("")
+ text_summarizer_Title = colored_header(
+ label="Text Summarizer 📄",
+ color_name="violet-70",
+ description="enter or paste text hear"
+ )
+
+ placeholder_text = """write or paste your text hear
+paragraph length should be greater then 30 words
+to generate output tap on screen or press ctrl+enter
+ """
+
+ ### input box
+ text_summarizer_input = st.text_area(
+ label="Enter Text Hear",
+ placeholder=placeholder_text,
+ height=340,
+ key="text summarizer"
+ )
+ Modal_Level(Summarizer_modal)
+
+ if text_summarizer_input.strip() == "":
+
+ try:
+ #### writing animation
+ write_hear_animation = insert_lottie_animation("lottie_animations/write-hear.json")
+ st_lottie(
+ animation_source=write_hear_animation,
+ speed=1,
+ reverse=False,loop=True,
+ quality="medium",
+ height=165,
+ width=240,
+ key="write hear"
+ )
+ except Exception as err:
+ st.warning("something went wrong...",err,icon="⚠️")
+
+ ### enter paragraph length greater than 35 words
+ elif len(text_summarizer_input.split()) < 20:
+ st.warning("paragraph should be greater than 35 words",icon="✏️")
+
+ else:
+
+ def word_token_maxvalue(text:str)->int:
+ """
+ converting paragraph into
+ tokens
+ """
+ word_para = []
+ words = word_tokenize(text)
+ for i in words:
+ word_para.append(i)
+
+ return len(word_para)
+
+ @st.cache_data
+ def random_value_text(text:str)->int:
+ random_value = np.random.randint(
+ 10,word_token_maxvalue(text),6
+ )
+
+ return random.choice(random_value)
+
+ def clean_data_for_summarization(text:str)->str:
+ clean_text = clean(
+ text=text,fix_unicode=True,
+ to_ascii=True,no_emoji=True,
+ lang="en",no_line_breaks=False,
+ keep_two_line_breaks=True
+ )
+ return clean_text
+
+
+
+ text_Max_length = st.slider(
+ label="Max length",
+ min_value=10,
+ max_value=word_token_maxvalue(text_summarizer_input),
+ key="text summarizer max length",
+ step=1,value=random_value_text(text_summarizer_input)
+ )
+
+ Generate_text_summary = st.button(
+ label="Generate summary",key="text summary"
+ )
+
+ try:
+ #### writing loading
+ writing_loading_animation = insert_lottie_animation("lottie_animations/writing-loading.json")
+ summary_generating_animation = st_lottie_spinner(
+ animation_source=writing_loading_animation,
+ speed=2,
+ reverse=False,loop=True,
+ quality="medium",
+ height=165,
+ width=240,
+ key="writing generating"
+ )
+ except Exception as err:
+ st.warning("something went wrong...",err,icon="⚠️")
+
+
+ #### initilization of modal
+ if Generate_text_summary:
+
+ if __name__=="__main__":
+
+ ##### summary generation
+ with summary_generating_animation:
+
+ ### modal
+ Text_Summary_output = Hugingface_summarization_modal(
+ summary_text=clean_data_for_summarization(text_summarizer_input),
+ modal_name=Summarizer_modal,
+ maximum_length=text_Max_length
+ )
+
+ ##### summary displaying and copy
+ st.text("")
+ st.text("")
+ st.markdown("Generated Summary ",unsafe_allow_html=True)
+ st.text("")
+ st.write(Text_Summary_output)
+ st.text("")
+
+ copy_text(Text_Summary_output)
+ st.text("")
+ st.text("")
+
+ ###### original text desplay and copy
+ st.markdown("Original Text ",unsafe_allow_html=True)
+ st.text("")
+ original_text = clean_data_for_summarization(text_summarizer_input)
+ st.write(original_text)
+ st.text("")
+ copy_text(original_text)
+
+ st.text("")
+ st.text("")
+ st.text("")
+
+ ### summarization details
+ text_summarization_details = {
+ "Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"],
+ "Output":[
+ f"{Summarizer_modal}",
+ f"Length - {len(text_summarizer_input.split())}",
+ f"Length - {len(Text_Summary_output.split())}",
+ f"Tokens Used - {text_Max_length}"
+ ]
+ }
+
+ summarization_details_df = pd.DataFrame(
+ data=text_summarization_details,
+ index=["Hugingface Modal","No. words","No. Words","Max Length"]
+ )
+
+ st.text("")
+ st.text("")
+ st.text("")
+ st.dataframe(summarization_details_df,use_container_width=True)
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
+
+
+
+##############################################################################################################
+
+############################## pdf summarizer
+
+
+#### pdf and text summarizer functions
+
+
+#### displaying uploaded pdf file
+def display_pdf_file(uploaded_file):
+ """
+ it is used to display the
+ file on screen
+ """
+ #### saving the uploaded file
+ def save_uploadfile(save_file):
+ with open(os.path.join("data",save_file.name),"wb") as f:
+ f.write(save_file.getbuffer())
+ return st.toast("file uploaded: {}".format(save_file.name))
+
+ try:
+ ### display pdf on screen
+ def displayPDF(pdf_file):
+ with open(pdf_file,"rb") as f:
+ base64_pdf = base64.b64encode(f.read()).decode("utf-8")
+
+ pdf_display = f"""
+
+ """
+
+ st.markdown(pdf_display,unsafe_allow_html=True)
+
+ ### save and display file
+ save_uploadfile(uploaded_file)
+ pdf_file = "data/"+uploaded_file.name
+ displayPDF(pdf_file)
+ except Exception as e:
+ st.warning("Something Went wrong...\n\n",e,icon="⚠️")
+
+
+#### Function to extract text from a specific page using pdfminer
+def extract_text_pdfminer(pdf_file, page_number):
+ """
+ this function extract pdf file
+ text by user input page number
+ """
+ try:
+ extracted_text = ''
+ for i, page_layout in enumerate(extract_pages(pdf_file)):
+ if i == page_number - 1:
+ ### Extract text elements and format them as closely as possible to the original layout
+ for element in page_layout:
+ if isinstance(element, LTTextContainer):
+ for text_line in element:
+ if isinstance(text_line, LTTextLine):
+ line = ''.join([char.get_text() for char in text_line if isinstance(char, LTChar)])
+ extracted_text += line.strip() + '\n'
+ return extracted_text
+ return st.warning("Invalid page number.",icon="⚠️")
+ except Exception as e:
+ st.warning("Something Went wrong...\n\n",e,icon="⚠️")
+
+
+###############################################
+
+
+##### clean text for summmarization task
+def uploaded_Clean_Text_Summarization(clean_text:str)->str:
+ """
+ it gives clean text for
+ summarization task
+ """
+ try:
+ pattern = r'[|`~^$<>]'
+ cleaned_paragraph = re.sub(pattern, '', clean_text)
+
+ ### using clean function
+ clean_output_para = clean(
+ text=cleaned_paragraph,fix_unicode=True,
+ to_ascii=True,no_emoji=True,
+ lang="en",no_line_breaks=False,
+ keep_two_line_breaks=True
+ )
+
+ except Exception as e:
+ st.warning("Something Went wrong...\n\n",e,icon="⚠️")
+
+ return clean_output_para
+
+
+### convert paragraph into tokens
+def generate_text_para_tokens(text_para:str)->int:
+ """
+ converting paragraph into
+ tokens
+ """
+ try:
+ pattern = r'[|`~#^$<>]'
+ cleaned_paragraph = re.sub(pattern, '', text_para)
+
+ #### using clean function
+ clean_para = clean(
+ text=cleaned_paragraph,fix_unicode=True,
+ to_ascii=True,no_emoji=True,
+ lang="en",no_line_breaks=False,
+ keep_two_line_breaks=True
+ )
+
+ word_tokens = []
+
+ for i in word_tokenize(clean_para):
+ word_tokens.append(i)
+ return len(np.array(word_tokens))
+
+ except Exception as e:
+ st.warning("Something Went wrong...\n\n",e,icon="⚠️")
+
+
+
+ ### generates random value for slider
+@st.cache_data
+def random_text_para_value(para:str)->int:
+ try:
+ random_value = np.random.randint(
+ 20, generate_text_para_tokens(para), 6
+ )
+ return random.choice(random_value)
+ except Exception as e:
+ st.warning("Something Went wrong...\n\n",e,icon="⚠️")
+
+
+#### PDF files summarizer
+def process_pdf(file):
+ reader = PdfReader(file)
+ page_count = len(reader.pages)
+
+ ### pdf display and information column
+ pdf_display_tab, pdf_summarizer_tab = st.tabs([f"Displaying {file.name}","Pdf Summarizer"])
+
+ ####### displaying pdf on pdf display tab
+ with pdf_display_tab:
+ st.markdown(f"Pdf - {file.name} ",unsafe_allow_html=True)
+
+ pdf_col, pdf_info_col = st.columns([5,3],gap="medium")
+ with pdf_col:
+ with st.spinner("Displaying file..."):
+ if __name__=="__main__":
+ display_pdf_file(file)
+
+ with pdf_info_col:
+ st.write("Your File: {}".format(file.name))
+ st.write(f"Number of pages: {str(page_count)}")
+ st.markdown(insert_html("htmlfiles/pdf-summarizer-info.html"),unsafe_allow_html=True)
+
+
+ ### pdf information and intract with pdf
+ with pdf_summarizer_tab:
+
+ st.text("")
+ st.markdown("Extract pdf text ",unsafe_allow_html=True)
+
+ ### toggle button for extracting text
+ extract_by_page_all = st.toggle(
+ label="Extract whole Text",key="toggle for extract text",
+ value=False
+ )
+
+ ### extracting all pdf text
+ if extract_by_page_all:
+ st.write("Extract whole pdf Text")
+
+ if st.button("Extract Whole Pdf",key="whole pdf text extract"):
+
+ st.text("")
+ st.text("")
+
+ with st.spinner("Extracting pdf..."):
+ whole_pdf_text = extract_text(file)
+ st.markdown("Whole PDF Text ",unsafe_allow_html=True)
+ st.text("")
+ st.write(whole_pdf_text)
+ else:
+ reader = PdfReader(file)
+ total_pages = len(reader.pages)
+ st.write("Extract by page Number")
+
+ pdf_page_no_col, pdf_page_noinfo_col = st.columns([3,5],gap="small")
+
+ with pdf_page_no_col:
+
+ ### input page number
+ Pdf_page_number_input = st.number_input(
+ label="Select the page number",
+ min_value=1, max_value=total_pages,
+ value=1,key="pdf page number",step=1
+ )
+
+ with pdf_page_noinfo_col:
+ st.text("")
+ st.text("")
+ st.write(f"Selected page: {str(Pdf_page_number_input)}")
+
+ Extract_page_no_button = st.button(
+ label="Extract Page text",
+ key="Extract button for page"
+ )
+ st.text("")
+ st.text("")
+
+ if Extract_page_no_button:
+ text_pdfminer = extract_text_pdfminer(file, Pdf_page_number_input)
+ st.session_state['extracted_text'] = text_pdfminer ### Store the extracted text in session state
+
+ if 'extracted_text' in st.session_state:
+ Pdf_file_text = st.text_area(
+ label=f"Text data of {Pdf_page_number_input} page",
+ value= st.session_state['extracted_text'],
+ height=400
+ )
+ st.session_state['extracted_text'] = Pdf_file_text # Update the text in session state based on user's input
+
+ #### pdf summarizer
+ st.text("")
+ Max_length_pdf_slider = st.slider(
+ label="Max Length",key="Pdf summarizer slider",
+ min_value=10,max_value=generate_text_para_tokens(Pdf_file_text),
+ value=random_text_para_value(Pdf_file_text)
+ )
+ st.text("")
+
+ upload_Pdf_summary_btn_col, upload_Pdf_print_btn_col, upload_clean_Pdf_print_btn_col, blank_Pdf_col1, blank_Pdf_col2 = st.columns(
+ [4,4,4,7,3],gap="small"
+ )
+
+ with blank_Pdf_col1:
+ pass
+ with blank_Pdf_col2:
+ pass
+
+ with upload_Pdf_summary_btn_col:
+ Generate_upload_pdf_summary_btn = st.button(
+ label="Generate Summary",
+ key="Generate summary of uploaded text pdf"
+ )
+
+ with upload_clean_Pdf_print_btn_col:
+ Upload_clean_pdf_btn = st.button(
+ label="Print Clean Text",
+ key="Print clean pdf file"
+ )
+
+
+ with upload_Pdf_print_btn_col:
+ upload_pdf_print_button = st.button(
+ label="Print Uploaded Text",
+ key="Print uploadded pdf"
+ )
+
+ ### clean text
+ if Upload_clean_pdf_btn:
+ with st.spinner("Generating Clean Text..."):
+ st.text("")
+ st.text("")
+ st.markdown("Clean Text ",unsafe_allow_html=True)
+ st.text("")
+ st.write(uploaded_Clean_Text_Summarization(Pdf_file_text))
+ st.text("")
+ copy_text(uploaded_Clean_Text_Summarization(Pdf_file_text))
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
+
+ ### uploaded text
+ elif upload_pdf_print_button:
+ with st.spinner("Generating Uploaded Text..."):
+ st.text("")
+ st.text("")
+ st.markdown("Uploaded Text ",unsafe_allow_html=True)
+ st.text("")
+ st.text(Pdf_file_text)
+ st.text("")
+ copy_text(Pdf_file_text)
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
+
+ ### generating summary
+ elif Generate_upload_pdf_summary_btn:
+ st.text("")
+ with st.spinner("Generating Summary..."):
+ st.text("")
+ if __name__=="__main__":
+ Uploded_Pdf_file_Summary = Hugingface_summarization_modal(
+ summary_text=uploaded_Clean_Text_Summarization(Pdf_file_text),
+ maximum_length=Max_length_pdf_slider,
+ modal_name="facebook-bart"
+ )
+ st.markdown("Summary ",unsafe_allow_html=True)
+ st.text("")
+
+ st.write(Uploded_Pdf_file_Summary)
+ st.text("")
+ copy_text(Uploded_Pdf_file_Summary)
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
+
+
+
+
+#################################################
+
+
+##### text file summarizer
+def process_text(file):
+ text_file = file.read().decode("utf-8")
+ st.text("")
+ st.markdown("Text file ",unsafe_allow_html=True)
+
+
+ ### displaying text you can edit also
+ Uploaded_text = st.text_area(
+ label=f"{file.name[:-4]} text data",
+ value=text_file,key="text file data",
+ height=400
+ )
+ st.write(f"**{file.name[:-4]}** Edit your file press ctrl+enter")
+
+ ###3 if length is less than 20
+ if len(Uploaded_text.split()) < 20:
+ st.warning("Summarization Task failed\nnot enough amount of text...",icon="⚠️")
+
+ else:
+ st.text("")
+ #### max length slider
+ max_text_para_length = st.slider(
+ label="Max Length",min_value=10,
+ max_value=generate_text_para_tokens(Uploaded_text),
+ step=1,key="paragraph length",
+ value=random_text_para_value(Uploaded_text)
+ )
+ st.text("")
+
+ upload_text_summary_btn_col, upload_text_print_btn_col, upload_clean_text_print_btn_col, blank_text_col1, blank_text_col2 = st.columns(
+ [4,4,4,7,3],gap="small"
+ )
+
+ with blank_text_col1:
+ pass
+ with blank_text_col2:
+ pass
+
+ with upload_text_summary_btn_col:
+ Generate_upload_text_summary_btn = st.button(
+ label="Generate Summary",
+ key="Generate summary of uploaded text"
+ )
+
+ with upload_clean_text_print_btn_col:
+ Upload_clean_text_btn = st.button(
+ label="Print Clean Text",
+ key="Print clean text file"
+ )
+
+
+ with upload_text_print_btn_col:
+ upload_text_print_button = st.button(
+ label="Print Uploaded Text",
+ key="Print uploadded text"
+ )
+
+ ### clean text
+ if Upload_clean_text_btn:
+ with st.spinner("Generating Clean Text..."):
+ st.text("")
+ st.text("")
+ st.markdown("Clean Text ",unsafe_allow_html=True)
+ st.text("")
+ st.write(uploaded_Clean_Text_Summarization(Uploaded_text))
+ st.text("")
+ copy_text(uploaded_Clean_Text_Summarization(Uploaded_text))
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",
+ unsafe_allow_html=True)
+
+ ### uploaded text
+ elif upload_text_print_button:
+ with st.spinner("Generating Uploaded Text..."):
+ st.text("")
+ st.text("")
+ st.markdown("Uploaded Text ",unsafe_allow_html=True)
+ st.text("")
+ st.text(Uploaded_text)
+ st.text("")
+ copy_text(Uploaded_text)
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",
+ unsafe_allow_html=True)
+
+
+ ### generating summary
+ elif Generate_upload_text_summary_btn:
+ st.text("")
+ with st.spinner("Generating Summary..."):
+ st.text("")
+ if __name__=="__main__":
+ Uploded_Text_file_Summary = Hugingface_summarization_modal(
+ summary_text=uploaded_Clean_Text_Summarization(Uploaded_text),
+ maximum_length=max_text_para_length,
+ modal_name="facebook-bart"
+ )
+ st.markdown("Summary ",unsafe_allow_html=True)
+ st.text("")
+
+ st.write(Uploded_Text_file_Summary)
+ st.text("")
+ copy_text(Uploded_Text_file_Summary)
+ st.text("")
+ st.text("")
+ st.text("")
+ st.markdown("Created by Nishant Maity ",unsafe_allow_html=True)
+
+
+
+if Main_menu == "PDF Summarizer":
+
+ ### blank and app columns
+ Blank_pdf1 ,pdf_summarizer_col, Blank_pdf2 = st.columns([1,8,1],gap="small")
+
+ with Blank_pdf1:
+ pass
+ with Blank_pdf2:
+ pass
+
+ with pdf_summarizer_col:
+ st.text("")
+ st.header("PDF Summarizer") ### app heading
+
+ ### File uploader function
+ app_file_upload = st.file_uploader("Upload a PDF or Text file", type=["pdf", "txt"])
+
+ if app_file_upload is not None:
+
+ ### if pdf file
+ if app_file_upload.type == "application/pdf":
+ if __name__=="__main__":
+ process_pdf(app_file_upload)
+
+ #### if text file
+ elif app_file_upload.type == "text/plain":
+ if __name__=="__main__":
+ process_text(app_file_upload)
+
+ else:
+ st.info("Upload your pdf, text file")
+
+
+ #### app info
+if Main_menu == "App Info":
+ Blank_app_info1, App_info_col, Blank_app_info2 = st.columns([2,8,2])
+
+ #### blank columns
+ with Blank_app_info1:
+ pass
+ with Blank_app_info2:
+ pass
+
+ ### app info column
+ with App_info_col:
+ st.text("")
+ st.header("App Info")
+ st.text("")
+
+ if __name__=="__main__":
+ st.markdown(insert_html("htmlfiles/app-info.html"),
+ unsafe_allow_html=True
+ )
+