import streamlit as st ### importing liberaries from streamlit_extras.colored_header import colored_header from streamlit_option_menu import option_menu import streamlit.components.v1 as component from streamlit_lottie import st_lottie, st_lottie_spinner from sklearn.feature_extraction.text import CountVectorizer from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline from transformers import pipeline from transformers import AutoTokenizer , AutoModelForSeq2SeqLM from newspaper import Article import nltk import nltk.downloader nltk.download('punkt_tab') from nltk.tokenize import word_tokenize from cleantext import clean from PyPDF2 import PdfReader import pdfminer from pdfminer.high_level import extract_text from pdfminer.high_level import extract_pages from pdfminer.layout import LTTextContainer, LTChar, LTTextLine import requests import json import numpy as np import pandas as pd import random import base64 import lxml import lxml_html_clean import re import os ###### main app functions ### insert external css def insert_css(css_file:str): with open(css_file) as f: st.markdown(f"",unsafe_allow_html=True) ### insert external html file def insert_html(html_file): with open(html_file) as f: return f.read() ### insert lottie animation json files def insert_lottie_animation(animation_file:str): with open(animation_file, "r") as f: return json.load(f) ### app tutorial video function @st.dialog("App Tutorial") def watch_tutorial(): st.subheader("GenAi Summarizer🤖") video_file = open("app_tutorial.mp4", "rb") video_bytes = video_file.read() st.text("") st.video( data=video_bytes,format="video/mp4", loop=True,autoplay=True ) def download_text(text, filename): """ download article text in document format """ #### Convert string to bytes b64 = base64.b64encode(text.encode()).decode() href = f""" """ st.markdown(href, unsafe_allow_html=True) if __name__=="__main__": insert_css("cssfiles/download-article.css") def copy_text(text): html_code = f"""

{text}

""" component.html(html_code,height=28) ### copy and download button def Copy_download_button(article_text,article_format,article_file_name): try: ### column for copy and download article Copy_btn_col,download_btn_col, blank_col_copy1, blank_col_copy2= st.columns([1,3,5,5],gap="small") with blank_col_copy1: st.text("") with blank_col_copy1: st.text("") with Copy_btn_col: copy_text(article_text) with download_btn_col: download_text(text=article_format,filename=article_file_name) except Exception as e: st.warning("Something went wrong...",e,icon="⚠️") ### setting page layout st.set_page_config( page_title="GenAi Summarizer", page_icon="🤗", initial_sidebar_state="collapsed", layout="wide" ) #### app settings css if __name__=="__main__": insert_css("cssfiles/app.css") ### huging face modals Hugingface_modals = { "google-pegasus":"google/pegasus-xsum", "facebook-bart":"facebook/bart-large-cnn", "t5-base":"t5-base" } ### summarization modal def Hugingface_summarization_modal(summary_text,modal_name,maximum_length): """ it is an text summarization modal it use hugingface modals for summarization task. it generates summarized text output """ def summarization_modal_name(modal)->str: if modal == "google-pegasus": return "google/pegasus-xsum" elif modal == "facebook-bart": return "facebook/bart-large-cnn" elif modal == "t5-base": return "t5-base" try: use_modal = summarization_modal_name(modal_name) ### modal name auto_tokenizer = AutoTokenizer.from_pretrained(use_modal) ### using autokenizer for pretrained modal auto_modal = AutoModelForSeq2SeqLM.from_pretrained(use_modal) ### creating pipeline summarizer = pipeline("summarization",model=auto_modal,tokenizer=auto_tokenizer) summarizer_text = summary_text summary_generate = summarizer( ### summarizer summarizer_text,max_length=maximum_length+20, min_length=maximum_length, do_sample=False ) return summary_generate[0]['summary_text'] except Exception as e: st.warning("Something went wrong...\n\n",e,icon="⚠️") ### displaying modals @st.cache_data def Modal_Level(modal_text): if modal_text == "google-pegasus": st.markdown( f""" """,unsafe_allow_html=True ) elif modal_text == "facebook-bart": st.markdown( f""" """,unsafe_allow_html=True ) elif modal_text == "t5-base": st.markdown( f""" """,unsafe_allow_html=True ) if __name__=="__main__": insert_css("cssfiles/modal.css") #### creating sidebar app_sidebar = st.sidebar with app_sidebar: st.text("") st.subheader("GenAi Summarizer🤖") st.write("Developer: **Nishant Maity**") st.text("") st.text("") ### creating menu bar Main_menu = option_menu( menu_title="", options=["Article Summarizer","Text Summarizer","PDF Summarizer","App Info"], icons=["chat-dots","card-heading","file-earmark-pdf","person-circle"], default_index=0, key="Menu Bar" ) st.text("") ### select modal for text and article summarizer if Main_menu == "Article Summarizer" or Main_menu == "Text Summarizer": Summarizer_modal = st.selectbox( label="Select Modal", options=np.array(list(Hugingface_modals.keys())), index=1, key="Modals" ) #### selecting number or paragraph for article summarizer if Main_menu == "Article Summarizer": with app_sidebar: st.text("") st.text("") Number_of_article_paragraph = st.slider( label="Number of paragraph", min_value=1,max_value=10, step=1,value=2, key="Number of paragraph" ) with app_sidebar: st.button( label="Watch App Tutorial", use_container_width=True, on_click=watch_tutorial ) ##### article summarizer functions ##### naive bayes text classification function def is_url(text): url_pattern = re.compile( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\$\$,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+|(?:www\.)[^\s]+') return bool(url_pattern.match(text)) # Train a model for text vs URL classification def train_model(): """ this function predict the given input is a simple text or url,link and generate output. """ #### dataset (normal text and URLs) try: data = [ ('This is a normal sentence.', 'text'), ('www.google.com', 'url'), ('Check out this website', 'text'), ('https://www.example.com', 'url'), ('Machine learning is fun', 'text'), ('http://openai.com', 'url'), ('Python is a great language', 'text'), ] texts = [d[0] for d in data] labels = [1 if d[1] == 'url' else 0 for d in data] ## 1 for url, 0 for text ##### modal training X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42) model = make_pipeline(CountVectorizer(), MultinomialNB()) model.fit(X_train, y_train) #### Train the model model.score(X_train, y_train) model.score(X_test, y_test) return model except Exception as e: st.error("Error...\n\n",e,icon="⚠️") ############################### article summarizer if Main_menu == "Article Summarizer": blank_article1, article_column, blank_article2 = st.columns([2,8,2],gap="small") with blank_article1: ### blank space pass with blank_article2: ### blank space pass #### main app column with article_column: #### app title st.text("") App_Title = colored_header( label="Web Article Summarizer 📑", color_name="blue-green-70", description="Search or paste url" ) Text_input = st.text_input( label="Search or paste url", placeholder="machine learning, java url- https://www.example.com" ) ### max slider value def max_length_slider_value(max_length)->int: if max_length == 1: return 90 elif max_length == 2: return 150 elif max_length == 3: return 250 elif max_length == 4: return 380 elif max_length == 5: return 470 elif max_length == 6: return 600 elif max_length == 7: return 750 elif max_length == 8: return 900 elif max_length == 9: return 1200 elif max_length == 10: return 1360 @st.cache_data def Default_max_length(default_value): if default_value == 1: random_value = np.random.randint(30,65,6) return random.choice(random_value) elif default_value == 2: random_value = np.random.randint(50,130,6) return random.choice(random_value) elif default_value == 3: random_value = np.random.randint(70,210,6) return random.choice(random_value) elif default_value == 4: random_value = np.random.randint(140,310,6) return random.choice(random_value) elif default_value == 5: random_value = np.random.randint(200,390,6) return random.choice(random_value) elif default_value == 6: random_value = np.random.randint(230,490,6) return random.choice(random_value) elif default_value == 7: random_value = np.random.randint(280,590,6) return random.choice(random_value) elif default_value == 8: random_value = np.random.randint(350,750,6) return random.choice(random_value) elif default_value == 9: random_value = np.random.randint(450,1050,6) return random.choice(random_value) elif default_value == 10: random_value = np.random.randint(560,1100,6) return random.choice(random_value) Button_column, Toggle_summary_btn, Modal_display = st.columns([1,1,3],gap="small") # article_summarizer(max_length) with Button_column: ### generate article button Generate_btn = st.button(label="Generate Article") with Toggle_summary_btn: ### if on then it generates summary summary_on = st.toggle( label="Summarizer", value=False, key="Summarizer on off" ) if summary_on: st.toast(body="Summarizer Mode on",icon="📑") else: st.toast(body="Scraping Mode",icon="📰") with Modal_display: if summary_on: Modal_Level(Summarizer_modal) else: pass if summary_on: max_length_article = st.slider( label="max length", min_value=10,max_value=max_length_slider_value(Number_of_article_paragraph), key="max length",value=Default_max_length(Number_of_article_paragraph) ) ################################################################################################ ### article scraper function def article_scraper(article_url): """ this function is used to scrap web articles and it provide text in the clean format """ try: article = Article(article_url) ### article object article.download() article.parse() nltk.download("punkt") article.nlp() st.markdown("

Article

",unsafe_allow_html=True) st.text("") st.text("") st.markdown( ### article title f"""

{article.title}

""",unsafe_allow_html=True ) article_publishdate = article.publish_date ### article publish date if article_publishdate == None: pass else: st.text("published on - "+str(article_publishdate)) article_authors = article.authors #### article authors if len(article_authors) == 0: pass else: autho_name_print = ", ".join(map(str, article_authors)) st.write(autho_name_print) ### generating article summary def get_top_paragraphs(text, num_paragraphs=Number_of_article_paragraph): """ this function gives top 1 - 10 paragraph of the scrap data """ paragraphs = text.split('\n\n') valid_paragraphs = [p.strip() for p in paragraphs if len(p.strip().split()) > 12] top_paragraphs = valid_paragraphs[:num_paragraphs] return '\n\n'.join(top_paragraphs) article_summary = article.text def remove_bracketed_numbers(text)->str: pattern = r'\[\d+\]' cleaned_text = re.sub(pattern, '', text) return cleaned_text cleaned_article_text = remove_bracketed_numbers(get_top_paragraphs(article_summary)) if "clean_text" not in st.session_state: st.session_state.clean_text = "" st.session_state.clean_text = cleaned_article_text def clean_output_text(text:str)->str: """ it gives clean text without emojies, no ascii values english text """ clean_text = clean( text=text,fix_unicode=True, to_ascii=True,no_emoji=True, lang="en",no_line_breaks=False, keep_two_line_breaks=True ) return clean_text ### Print the cleaned text st.write(clean_output_text(st.session_state.clean_text)) st.text("") st.text("") ### copy download button Article_filename = f"{article.title}.doc" Article_text_format = f""" \n\n\n {str(article.title)} published on - {str(article_publishdate)} Authors - {", ".join(map(str, article_authors))} \n\n\n {str(cleaned_article_text)} """ if __name__=="__main__": Copy_download_button( article_text=clean_output_text(cleaned_article_text), article_format=Article_text_format, article_file_name=Article_filename ) st.text("") if summary_on: st.markdown("

Article Summary

",unsafe_allow_html=True) #### summarization modal with st.spinner("Generating Summary..."): if __name__=="__main__": summarized_article_text = Hugingface_summarization_modal( summary_text=clean_output_text(cleaned_article_text), modal_name=Summarizer_modal, maximum_length=max_length_article ) #### clean ai generated paragraph st.write(summarized_article_text) st.text("") st.text("") summary_format = f""" \n\n {article.title} \n\n\n {summarized_article_text} """ #### copy or download summary button if __name__=="__main__": Copy_download_button( article_text=summarized_article_text, article_file_name=f"{article.title}-summary.doc", article_format=summary_format ) if summary_on: ### summarization details summarization_details = { "Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"], "Output":[ f"{Summarizer_modal}", f"Length - {len(cleaned_article_text.split())}", f"Length - {len(summarized_article_text.split())}", f"Tokens Used - {max_length_article}" ] } summarization_details_df = pd.DataFrame( data=summarization_details, index=["Hugingface Modal","No. words","No. Words","Max Length"] ) st.text("") st.text("") st.text("") st.dataframe(summarization_details_df,use_container_width=True) except Exception as err: ### 404 error animation Error_404_col, page_not_found_col = st.columns(2) with Error_404_col: try: Error_404 = insert_lottie_animation("lottie_animations/error-404.json") st_lottie( animation_source=Error_404, speed=1, reverse=False,loop=True, quality="high", height=315, width=400, key="404 error" ) except Exception as err: st.warning("something went wrong...",err,icon="⚠️") with page_not_found_col: try: page_not_found = insert_lottie_animation("lottie_animations/page-not-found.json") st_lottie( animation_source=page_not_found, speed=1, reverse=False,loop=True, quality="high", height=265, width=400, key="page not found" ) except Exception as err: st.warning("something went wrong...",err,icon="⚠️") st.warning(f"Something went wrong...\n\n{err}",icon="⚠️") def article_summarizer(summary_length): st.write(summary_length) def check_url_exists(url): try: response = requests.head(url, allow_redirects=True) if response.status_code < 400: return True else: return False except requests.exceptions.RequestException as e: # Handle any exception (e.g., connection error, timeout) return False ########### link classified article def link_classified(text): """ it use url or link to scrap articles provide author name, publish date, summary of article """ try: url_text = text article_url_link = f"{url_text}" ### url to scrap if __name__=="__main__": article_scraper(article_url_link) st.text("") st.text("") if check_url_exists(article_url_link): st.link_button(label="Visit Article",url=(article_url_link)) else: st.warning("Url does not exist...",icon="⚠️") st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

",unsafe_allow_html=True) except Exception as err: st.warning(f"Something went wrong...\n\n{err}",icon="⚠️") ####$ text classified article def text_classified(text): """ it use wikipedia to scrap articles provide author name, publish date, summary of article """ try: url_text = text.replace(" ","_") article_url = f"https://en.wikipedia.org/wiki/{url_text}" ### url to scrap if __name__=="__main__": article_scraper(article_url) st.text("") st.text("") if check_url_exists(article_url): st.link_button(label="Visit Article",url=article_url) else: st.warning("Url does not exist...",icon="⚠️") st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

",unsafe_allow_html=True) except Exception as e: st.warning("Something went wrong...",e,icon="⚠️") ############################################################################################ ### j query animation if not Generate_btn or Text_input.strip() == "": try: def particle(Js_file): with open(Js_file) as f: component.html(f"{f.read()}", height=420) if __name__=="__main__": particle("animation/particles.html") except Exception as e: st.error("Something went wrong...\n\n",e) if Generate_btn: if Text_input.strip() != "": st.text("") st.text("") ### Function to classify the input text def classify_input(text, model): try: if is_url(text): link_classified(text) else: #### If it's not detected as a URL prediction = model.predict([text])[0] if prediction == 1: link_classified(Text_input) else: text_classified(Text_input) except Exception as e: st.error("Error...\n\n",e,icon="⚠️") with st.spinner("Generating Article..."): if __name__=="__main__": model = train_model() classify_input(Text_input, model) #################################################################################################### ################################# Text summarizer if Main_menu == "Text Summarizer": blank_text_sum1, text_summarizer_col, blank_text_sum2 = st.columns([2,8,2],gap="small") ### blank columns with blank_text_sum1: pass with blank_text_sum2: pass ### text summarizer app column with text_summarizer_col: #### app title st.text("") text_summarizer_Title = colored_header( label="Text Summarizer 📄", color_name="violet-70", description="enter or paste text hear" ) placeholder_text = """write or paste your text hear paragraph length should be greater then 30 words to generate output tap on screen or press ctrl+enter """ ### input box text_summarizer_input = st.text_area( label="Enter Text Hear", placeholder=placeholder_text, height=340, key="text summarizer" ) Modal_Level(Summarizer_modal) if text_summarizer_input.strip() == "": try: #### writing animation write_hear_animation = insert_lottie_animation("lottie_animations/write-hear.json") st_lottie( animation_source=write_hear_animation, speed=1, reverse=False,loop=True, quality="medium", height=165, width=240, key="write hear" ) except Exception as err: st.warning("something went wrong...",err,icon="⚠️") ### enter paragraph length greater than 35 words elif len(text_summarizer_input.split()) < 20: st.warning("paragraph should be greater than 35 words",icon="✏️") else: def word_token_maxvalue(text:str)->int: """ converting paragraph into tokens """ word_para = [] words = word_tokenize(text) for i in words: word_para.append(i) return len(word_para) @st.cache_data def random_value_text(text:str)->int: random_value = np.random.randint( 10,word_token_maxvalue(text),6 ) return random.choice(random_value) def clean_data_for_summarization(text:str)->str: clean_text = clean( text=text,fix_unicode=True, to_ascii=True,no_emoji=True, lang="en",no_line_breaks=False, keep_two_line_breaks=True ) return clean_text text_Max_length = st.slider( label="Max length", min_value=10, max_value=word_token_maxvalue(text_summarizer_input), key="text summarizer max length", step=1,value=random_value_text(text_summarizer_input) ) Generate_text_summary = st.button( label="Generate summary",key="text summary" ) try: #### writing loading writing_loading_animation = insert_lottie_animation("lottie_animations/writing-loading.json") summary_generating_animation = st_lottie_spinner( animation_source=writing_loading_animation, speed=2, reverse=False,loop=True, quality="medium", height=165, width=240, key="writing generating" ) except Exception as err: st.warning("something went wrong...",err,icon="⚠️") #### initilization of modal if Generate_text_summary: if __name__=="__main__": ##### summary generation with summary_generating_animation: ### modal Text_Summary_output = Hugingface_summarization_modal( summary_text=clean_data_for_summarization(text_summarizer_input), modal_name=Summarizer_modal, maximum_length=text_Max_length ) ##### summary displaying and copy st.text("") st.text("") st.markdown("

Generated Summary

",unsafe_allow_html=True) st.text("") st.write(Text_Summary_output) st.text("") copy_text(Text_Summary_output) st.text("") st.text("") ###### original text desplay and copy st.markdown("

Original Text

",unsafe_allow_html=True) st.text("") original_text = clean_data_for_summarization(text_summarizer_input) st.write(original_text) st.text("") copy_text(original_text) st.text("") st.text("") st.text("") ### summarization details text_summarization_details = { "Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"], "Output":[ f"{Summarizer_modal}", f"Length - {len(text_summarizer_input.split())}", f"Length - {len(Text_Summary_output.split())}", f"Tokens Used - {text_Max_length}" ] } summarization_details_df = pd.DataFrame( data=text_summarization_details, index=["Hugingface Modal","No. words","No. Words","Max Length"] ) st.text("") st.text("") st.text("") st.dataframe(summarization_details_df,use_container_width=True) st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

",unsafe_allow_html=True) ############################################################################################################## ############################## pdf summarizer #### pdf and text summarizer functions #### displaying uploaded pdf file def display_pdf_file(uploaded_file): """ it is used to display the file on screen """ #### saving the uploaded file def save_uploadfile(save_file): with open(os.path.join("data",save_file.name),"wb") as f: f.write(save_file.getbuffer()) return st.toast("file uploaded: {}".format(save_file.name)) try: ### display pdf on screen def displayPDF(pdf_file): with open(pdf_file,"rb") as f: base64_pdf = base64.b64encode(f.read()).decode("utf-8") pdf_display = f""" """ st.markdown(pdf_display,unsafe_allow_html=True) ### save and display file save_uploadfile(uploaded_file) pdf_file = "data/"+uploaded_file.name displayPDF(pdf_file) except Exception as e: st.warning("Something Went wrong...\n\n",e,icon="⚠️") #### Function to extract text from a specific page using pdfminer def extract_text_pdfminer(pdf_file, page_number): """ this function extract pdf file text by user input page number """ try: extracted_text = '' for i, page_layout in enumerate(extract_pages(pdf_file)): if i == page_number - 1: ### Extract text elements and format them as closely as possible to the original layout for element in page_layout: if isinstance(element, LTTextContainer): for text_line in element: if isinstance(text_line, LTTextLine): line = ''.join([char.get_text() for char in text_line if isinstance(char, LTChar)]) extracted_text += line.strip() + '\n' return extracted_text return st.warning("Invalid page number.",icon="⚠️") except Exception as e: st.warning("Something Went wrong...\n\n",e,icon="⚠️") ############################################### ##### clean text for summmarization task def uploaded_Clean_Text_Summarization(clean_text:str)->str: """ it gives clean text for summarization task """ try: pattern = r'[|`~^$<>]' cleaned_paragraph = re.sub(pattern, '', clean_text) ### using clean function clean_output_para = clean( text=cleaned_paragraph,fix_unicode=True, to_ascii=True,no_emoji=True, lang="en",no_line_breaks=False, keep_two_line_breaks=True ) except Exception as e: st.warning("Something Went wrong...\n\n",e,icon="⚠️") return clean_output_para ### convert paragraph into tokens def generate_text_para_tokens(text_para:str)->int: """ converting paragraph into tokens """ try: pattern = r'[|`~#^$<>]' cleaned_paragraph = re.sub(pattern, '', text_para) #### using clean function clean_para = clean( text=cleaned_paragraph,fix_unicode=True, to_ascii=True,no_emoji=True, lang="en",no_line_breaks=False, keep_two_line_breaks=True ) word_tokens = [] for i in word_tokenize(clean_para): word_tokens.append(i) return len(np.array(word_tokens)) except Exception as e: st.warning("Something Went wrong...\n\n",e,icon="⚠️") ### generates random value for slider @st.cache_data def random_text_para_value(para:str)->int: try: random_value = np.random.randint( 20, generate_text_para_tokens(para), 6 ) return random.choice(random_value) except Exception as e: st.warning("Something Went wrong...\n\n",e,icon="⚠️") #### PDF files summarizer def process_pdf(file): reader = PdfReader(file) page_count = len(reader.pages) ### pdf display and information column pdf_display_tab, pdf_summarizer_tab = st.tabs([f"Displaying {file.name}","Pdf Summarizer"]) ####### displaying pdf on pdf display tab with pdf_display_tab: st.markdown(f"

Pdf - {file.name}

",unsafe_allow_html=True) pdf_col, pdf_info_col = st.columns([5,3],gap="medium") with pdf_col: with st.spinner("Displaying file..."): if __name__=="__main__": display_pdf_file(file) with pdf_info_col: st.write("Your File: {}".format(file.name)) st.write(f"Number of pages: {str(page_count)}") st.markdown(insert_html("htmlfiles/pdf-summarizer-info.html"),unsafe_allow_html=True) ### pdf information and intract with pdf with pdf_summarizer_tab: st.text("") st.markdown("

Extract pdf text

",unsafe_allow_html=True) ### toggle button for extracting text extract_by_page_all = st.toggle( label="Extract whole Text",key="toggle for extract text", value=False ) ### extracting all pdf text if extract_by_page_all: st.write("Extract whole pdf Text") if st.button("Extract Whole Pdf",key="whole pdf text extract"): st.text("") st.text("") with st.spinner("Extracting pdf..."): whole_pdf_text = extract_text(file) st.markdown("

Whole PDF Text

",unsafe_allow_html=True) st.text("") st.write(whole_pdf_text) else: reader = PdfReader(file) total_pages = len(reader.pages) st.write("Extract by page Number") pdf_page_no_col, pdf_page_noinfo_col = st.columns([3,5],gap="small") with pdf_page_no_col: ### input page number Pdf_page_number_input = st.number_input( label="Select the page number", min_value=1, max_value=total_pages, value=1,key="pdf page number",step=1 ) with pdf_page_noinfo_col: st.text("") st.text("") st.write(f"Selected page: {str(Pdf_page_number_input)}") Extract_page_no_button = st.button( label="Extract Page text", key="Extract button for page" ) st.text("") st.text("") if Extract_page_no_button: text_pdfminer = extract_text_pdfminer(file, Pdf_page_number_input) st.session_state['extracted_text'] = text_pdfminer ### Store the extracted text in session state if 'extracted_text' in st.session_state: Pdf_file_text = st.text_area( label=f"Text data of {Pdf_page_number_input} page", value= st.session_state['extracted_text'], height=400 ) st.session_state['extracted_text'] = Pdf_file_text # Update the text in session state based on user's input #### pdf summarizer st.text("") Max_length_pdf_slider = st.slider( label="Max Length",key="Pdf summarizer slider", min_value=10,max_value=generate_text_para_tokens(Pdf_file_text), value=random_text_para_value(Pdf_file_text) ) st.text("") upload_Pdf_summary_btn_col, upload_Pdf_print_btn_col, upload_clean_Pdf_print_btn_col, blank_Pdf_col1, blank_Pdf_col2 = st.columns( [4,4,4,7,3],gap="small" ) with blank_Pdf_col1: pass with blank_Pdf_col2: pass with upload_Pdf_summary_btn_col: Generate_upload_pdf_summary_btn = st.button( label="Generate Summary", key="Generate summary of uploaded text pdf" ) with upload_clean_Pdf_print_btn_col: Upload_clean_pdf_btn = st.button( label="Print Clean Text", key="Print clean pdf file" ) with upload_Pdf_print_btn_col: upload_pdf_print_button = st.button( label="Print Uploaded Text", key="Print uploadded pdf" ) ### clean text if Upload_clean_pdf_btn: with st.spinner("Generating Clean Text..."): st.text("") st.text("") st.markdown("

Clean Text

",unsafe_allow_html=True) st.text("") st.write(uploaded_Clean_Text_Summarization(Pdf_file_text)) st.text("") copy_text(uploaded_Clean_Text_Summarization(Pdf_file_text)) st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

",unsafe_allow_html=True) ### uploaded text elif upload_pdf_print_button: with st.spinner("Generating Uploaded Text..."): st.text("") st.text("") st.markdown("

Uploaded Text

",unsafe_allow_html=True) st.text("") st.text(Pdf_file_text) st.text("") copy_text(Pdf_file_text) st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

",unsafe_allow_html=True) ### generating summary elif Generate_upload_pdf_summary_btn: st.text("") with st.spinner("Generating Summary..."): st.text("") if __name__=="__main__": Uploded_Pdf_file_Summary = Hugingface_summarization_modal( summary_text=uploaded_Clean_Text_Summarization(Pdf_file_text), maximum_length=Max_length_pdf_slider, modal_name="facebook-bart" ) st.markdown("

Summary

",unsafe_allow_html=True) st.text("") st.write(Uploded_Pdf_file_Summary) st.text("") copy_text(Uploded_Pdf_file_Summary) st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

",unsafe_allow_html=True) ################################################# ##### text file summarizer def process_text(file): text_file = file.read().decode("utf-8") st.text("") st.markdown("

Text file

",unsafe_allow_html=True) ### displaying text you can edit also Uploaded_text = st.text_area( label=f"{file.name[:-4]} text data", value=text_file,key="text file data", height=400 ) st.write(f"**{file.name[:-4]}** Edit your file press ctrl+enter") ###3 if length is less than 20 if len(Uploaded_text.split()) < 20: st.warning("Summarization Task failed\nnot enough amount of text...",icon="⚠️") else: st.text("") #### max length slider max_text_para_length = st.slider( label="Max Length",min_value=10, max_value=generate_text_para_tokens(Uploaded_text), step=1,key="paragraph length", value=random_text_para_value(Uploaded_text) ) st.text("") upload_text_summary_btn_col, upload_text_print_btn_col, upload_clean_text_print_btn_col, blank_text_col1, blank_text_col2 = st.columns( [4,4,4,7,3],gap="small" ) with blank_text_col1: pass with blank_text_col2: pass with upload_text_summary_btn_col: Generate_upload_text_summary_btn = st.button( label="Generate Summary", key="Generate summary of uploaded text" ) with upload_clean_text_print_btn_col: Upload_clean_text_btn = st.button( label="Print Clean Text", key="Print clean text file" ) with upload_text_print_btn_col: upload_text_print_button = st.button( label="Print Uploaded Text", key="Print uploadded text" ) ### clean text if Upload_clean_text_btn: with st.spinner("Generating Clean Text..."): st.text("") st.text("") st.markdown("

Clean Text

",unsafe_allow_html=True) st.text("") st.write(uploaded_Clean_Text_Summarization(Uploaded_text)) st.text("") copy_text(uploaded_Clean_Text_Summarization(Uploaded_text)) st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

", unsafe_allow_html=True) ### uploaded text elif upload_text_print_button: with st.spinner("Generating Uploaded Text..."): st.text("") st.text("") st.markdown("

Uploaded Text

",unsafe_allow_html=True) st.text("") st.text(Uploaded_text) st.text("") copy_text(Uploaded_text) st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

", unsafe_allow_html=True) ### generating summary elif Generate_upload_text_summary_btn: st.text("") with st.spinner("Generating Summary..."): st.text("") if __name__=="__main__": Uploded_Text_file_Summary = Hugingface_summarization_modal( summary_text=uploaded_Clean_Text_Summarization(Uploaded_text), maximum_length=max_text_para_length, modal_name="facebook-bart" ) st.markdown("

Summary

",unsafe_allow_html=True) st.text("") st.write(Uploded_Text_file_Summary) st.text("") copy_text(Uploded_Text_file_Summary) st.text("") st.text("") st.text("") st.markdown("

Created by Nishant Maity

",unsafe_allow_html=True) if Main_menu == "PDF Summarizer": ### blank and app columns Blank_pdf1 ,pdf_summarizer_col, Blank_pdf2 = st.columns([1,8,1],gap="small") with Blank_pdf1: pass with Blank_pdf2: pass with pdf_summarizer_col: st.text("") st.header("PDF Summarizer") ### app heading ### File uploader function app_file_upload = st.file_uploader("Upload a PDF or Text file", type=["pdf", "txt"]) if app_file_upload is not None: ### if pdf file if app_file_upload.type == "application/pdf": if __name__=="__main__": process_pdf(app_file_upload) #### if text file elif app_file_upload.type == "text/plain": if __name__=="__main__": process_text(app_file_upload) else: st.info("Upload your pdf, text file") #### app info if Main_menu == "App Info": Blank_app_info1, App_info_col, Blank_app_info2 = st.columns([2,8,2]) #### blank columns with Blank_app_info1: pass with Blank_app_info2: pass ### app info column with App_info_col: st.text("") st.header("App Info") st.text("") if __name__=="__main__": st.markdown(insert_html("htmlfiles/app-info.html"), unsafe_allow_html=True )