""",unsafe_allow_html=True
)
if __name__=="__main__":
insert_css("cssfiles/modal.css")
#### creating sidebar
app_sidebar = st.sidebar
with app_sidebar:
st.text("")
st.subheader("GenAi Summarizer🤖")
st.write("Developer: **Nishant Maity**")
st.text("")
st.text("")
### creating menu bar
Main_menu = option_menu(
menu_title="",
options=["Article Summarizer","Text Summarizer","PDF Summarizer","App Info"],
icons=["chat-dots","card-heading","file-earmark-pdf","person-circle"],
default_index=0,
key="Menu Bar"
)
st.text("")
### select modal for text and article summarizer
if Main_menu == "Article Summarizer" or Main_menu == "Text Summarizer":
Summarizer_modal = st.selectbox(
label="Select Modal",
options=np.array(list(Hugingface_modals.keys())),
index=1,
key="Modals"
)
#### selecting number or paragraph for article summarizer
if Main_menu == "Article Summarizer":
with app_sidebar:
st.text("")
st.text("")
Number_of_article_paragraph = st.slider(
label="Number of paragraph",
min_value=1,max_value=10,
step=1,value=2,
key="Number of paragraph"
)
with app_sidebar:
st.button(
label="Watch App Tutorial",
use_container_width=True,
on_click=watch_tutorial
)
##### article summarizer functions
##### naive bayes text classification function
def is_url(text):
url_pattern = re.compile(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+|(?:www\.)[^\s]+')
return bool(url_pattern.match(text))
# Train a model for text vs URL classification
def train_model():
"""
this function predict the given input
is a simple text or url,link
and generate output.
"""
#### dataset (normal text and URLs)
try:
data = [
('This is a normal sentence.', 'text'),
('www.google.com', 'url'),
('Check out this website', 'text'),
('https://www.example.com', 'url'),
('Machine learning is fun', 'text'),
('http://openai.com', 'url'),
('Python is a great language', 'text'),
]
texts = [d[0] for d in data]
labels = [1 if d[1] == 'url' else 0 for d in data] ## 1 for url, 0 for text
##### modal training
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
model = make_pipeline(CountVectorizer(), MultinomialNB())
model.fit(X_train, y_train) #### Train the model
model.score(X_train, y_train)
model.score(X_test, y_test)
return model
except Exception as e:
st.error("Error...\n\n",e,icon="⚠️")
############################### article summarizer
if Main_menu == "Article Summarizer":
blank_article1, article_column, blank_article2 = st.columns([2,8,2],gap="small")
with blank_article1: ### blank space
pass
with blank_article2: ### blank space
pass
#### main app column
with article_column:
#### app title
st.text("")
App_Title = colored_header(
label="Web Article Summarizer 📑",
color_name="blue-green-70",
description="Search or paste url"
)
Text_input = st.text_input(
label="Search or paste url",
placeholder="machine learning, java url- https://www.example.com"
)
### max slider value
def max_length_slider_value(max_length)->int:
if max_length == 1:
return 90
elif max_length == 2:
return 150
elif max_length == 3:
return 250
elif max_length == 4:
return 380
elif max_length == 5:
return 470
elif max_length == 6:
return 600
elif max_length == 7:
return 750
elif max_length == 8:
return 900
elif max_length == 9:
return 1200
elif max_length == 10:
return 1360
@st.cache_data
def Default_max_length(default_value):
if default_value == 1:
random_value = np.random.randint(30,65,6)
return random.choice(random_value)
elif default_value == 2:
random_value = np.random.randint(50,130,6)
return random.choice(random_value)
elif default_value == 3:
random_value = np.random.randint(70,210,6)
return random.choice(random_value)
elif default_value == 4:
random_value = np.random.randint(140,310,6)
return random.choice(random_value)
elif default_value == 5:
random_value = np.random.randint(200,390,6)
return random.choice(random_value)
elif default_value == 6:
random_value = np.random.randint(230,490,6)
return random.choice(random_value)
elif default_value == 7:
random_value = np.random.randint(280,590,6)
return random.choice(random_value)
elif default_value == 8:
random_value = np.random.randint(350,750,6)
return random.choice(random_value)
elif default_value == 9:
random_value = np.random.randint(450,1050,6)
return random.choice(random_value)
elif default_value == 10:
random_value = np.random.randint(560,1100,6)
return random.choice(random_value)
Button_column, Toggle_summary_btn, Modal_display = st.columns([1,1,3],gap="small")
# article_summarizer(max_length)
with Button_column:
### generate article button
Generate_btn = st.button(label="Generate Article")
with Toggle_summary_btn:
### if on then it generates summary
summary_on = st.toggle(
label="Summarizer",
value=False,
key="Summarizer on off"
)
if summary_on:
st.toast(body="Summarizer Mode on",icon="📑")
else:
st.toast(body="Scraping Mode",icon="📰")
with Modal_display:
if summary_on:
Modal_Level(Summarizer_modal)
else:
pass
if summary_on:
max_length_article = st.slider(
label="max length",
min_value=10,max_value=max_length_slider_value(Number_of_article_paragraph),
key="max length",value=Default_max_length(Number_of_article_paragraph)
)
################################################################################################
### article scraper function
def article_scraper(article_url):
"""
this function is used to scrap
web articles and it provide
text in the clean format
"""
try:
article = Article(article_url) ### article object
article.download()
article.parse()
nltk.download("punkt")
article.nlp()
st.markdown("
Article
",unsafe_allow_html=True)
st.text("")
st.text("")
st.markdown( ### article title
f"""
{article.title}
""",unsafe_allow_html=True
)
article_publishdate = article.publish_date ### article publish date
if article_publishdate == None:
pass
else:
st.text("published on - "+str(article_publishdate))
article_authors = article.authors #### article authors
if len(article_authors) == 0:
pass
else:
autho_name_print = ", ".join(map(str, article_authors))
st.write(autho_name_print)
### generating article summary
def get_top_paragraphs(text, num_paragraphs=Number_of_article_paragraph):
"""
this function gives
top 1 - 10 paragraph of the
scrap data
"""
paragraphs = text.split('\n\n')
valid_paragraphs = [p.strip() for p in paragraphs if len(p.strip().split()) > 12]
top_paragraphs = valid_paragraphs[:num_paragraphs]
return '\n\n'.join(top_paragraphs)
article_summary = article.text
def remove_bracketed_numbers(text)->str:
pattern = r'\[\d+\]'
cleaned_text = re.sub(pattern, '', text)
return cleaned_text
cleaned_article_text = remove_bracketed_numbers(get_top_paragraphs(article_summary))
if "clean_text" not in st.session_state:
st.session_state.clean_text = ""
st.session_state.clean_text = cleaned_article_text
def clean_output_text(text:str)->str:
"""
it gives clean text without emojies,
no ascii values english text
"""
clean_text = clean(
text=text,fix_unicode=True,
to_ascii=True,no_emoji=True,
lang="en",no_line_breaks=False,
keep_two_line_breaks=True
)
return clean_text
### Print the cleaned text
st.write(clean_output_text(st.session_state.clean_text))
st.text("")
st.text("")
### copy download button
Article_filename = f"{article.title}.doc"
Article_text_format = f"""
\n\n\n
{str(article.title)}
published on - {str(article_publishdate)}
Authors - {", ".join(map(str, article_authors))}
\n\n\n
{str(cleaned_article_text)}
"""
if __name__=="__main__":
Copy_download_button(
article_text=clean_output_text(cleaned_article_text),
article_format=Article_text_format,
article_file_name=Article_filename
)
st.text("")
if summary_on:
st.markdown("
Article Summary
",unsafe_allow_html=True)
#### summarization modal
with st.spinner("Generating Summary..."):
if __name__=="__main__":
summarized_article_text = Hugingface_summarization_modal(
summary_text=clean_output_text(cleaned_article_text),
modal_name=Summarizer_modal,
maximum_length=max_length_article
)
#### clean ai generated paragraph
st.write(summarized_article_text)
st.text("")
st.text("")
summary_format = f"""
\n\n
{article.title}
\n\n\n
{summarized_article_text}
"""
#### copy or download summary button
if __name__=="__main__":
Copy_download_button(
article_text=summarized_article_text,
article_file_name=f"{article.title}-summary.doc",
article_format=summary_format
)
if summary_on:
### summarization details
summarization_details = {
"Summarization Details":["Modal Name","Text Length","Summary Length","Max Tokens"],
"Output":[
f"{Summarizer_modal}",
f"Length - {len(cleaned_article_text.split())}",
f"Length - {len(summarized_article_text.split())}",
f"Tokens Used - {max_length_article}"
]
}
summarization_details_df = pd.DataFrame(
data=summarization_details,
index=["Hugingface Modal","No. words","No. Words","Max Length"]
)
st.text("")
st.text("")
st.text("")
st.dataframe(summarization_details_df,use_container_width=True)
except Exception as err:
### 404 error animation
Error_404_col, page_not_found_col = st.columns(2)
with Error_404_col:
try:
Error_404 = insert_lottie_animation("lottie_animations/error-404.json")
st_lottie(
animation_source=Error_404,
speed=1,
reverse=False,loop=True,
quality="high",
height=315,
width=400,
key="404 error"
)
except Exception as err:
st.warning("something went wrong...",err,icon="⚠️")
with page_not_found_col:
try:
page_not_found = insert_lottie_animation("lottie_animations/page-not-found.json")
st_lottie(
animation_source=page_not_found,
speed=1,
reverse=False,loop=True,
quality="high",
height=265,
width=400,
key="page not found"
)
except Exception as err:
st.warning("something went wrong...",err,icon="⚠️")
st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")
def article_summarizer(summary_length):
st.write(summary_length)
def check_url_exists(url):
try:
response = requests.head(url, allow_redirects=True)
if response.status_code < 400:
return True
else:
return False
except requests.exceptions.RequestException as e:
# Handle any exception (e.g., connection error, timeout)
return False
########### link classified article
def link_classified(text):
"""
it use url or link to scrap articles
provide author name, publish date, summary of
article
"""
try:
url_text = text
article_url_link = f"{url_text}" ### url to scrap
if __name__=="__main__":
article_scraper(article_url_link)
st.text("")
st.text("")
if check_url_exists(article_url_link):
st.link_button(label="Visit Article",url=(article_url_link))
else:
st.warning("Url does not exist...",icon="⚠️")
st.text("")
st.text("")
st.text("")
st.markdown("
Created by Nishant Maity
",unsafe_allow_html=True)
except Exception as err:
st.warning(f"Something went wrong...\n\n{err}",icon="⚠️")
####$ text classified article
def text_classified(text):
"""
it use wikipedia to scrap articles
provide author name, publish date, summary of
article
"""
try:
url_text = text.replace(" ","_")
article_url = f"https://en.wikipedia.org/wiki/{url_text}" ### url to scrap
if __name__=="__main__":
article_scraper(article_url)
st.text("")
st.text("")
if check_url_exists(article_url):
st.link_button(label="Visit Article",url=article_url)
else:
st.warning("Url does not exist...",icon="⚠️")
st.text("")
st.text("")
st.text("")
st.markdown("
Created by Nishant Maity
",unsafe_allow_html=True)
except Exception as e:
st.warning("Something went wrong...",e,icon="⚠️")
############################################################################################
### j query animation
if not Generate_btn or Text_input.strip() == "":
try:
def particle(Js_file):
with open(Js_file) as f:
component.html(f"{f.read()}", height=420)
if __name__=="__main__":
particle("animation/particles.html")
except Exception as e:
st.error("Something went wrong...\n\n",e)
if Generate_btn:
if Text_input.strip() != "":
st.text("")
st.text("")
### Function to classify the input text
def classify_input(text, model):
try:
if is_url(text):
link_classified(text)
else:
#### If it's not detected as a URL
prediction = model.predict([text])[0]
if prediction == 1:
link_classified(Text_input)
else:
text_classified(Text_input)
except Exception as e:
st.error("Error...\n\n",e,icon="⚠️")
with st.spinner("Generating Article..."):
if __name__=="__main__":
model = train_model()
classify_input(Text_input, model)
####################################################################################################
################################# Text summarizer
if Main_menu == "Text Summarizer":
blank_text_sum1, text_summarizer_col, blank_text_sum2 = st.columns([2,8,2],gap="small")
### blank columns
with blank_text_sum1:
pass
with blank_text_sum2:
pass
### text summarizer app column
with text_summarizer_col:
#### app title
st.text("")
text_summarizer_Title = colored_header(
label="Text Summarizer 📄",
color_name="violet-70",
description="enter or paste text hear"
)
placeholder_text = """write or paste your text hear
paragraph length should be greater then 30 words
to generate output tap on screen or press ctrl+enter
"""
### input box
text_summarizer_input = st.text_area(
label="Enter Text Hear",
placeholder=placeholder_text,
height=340,
key="text summarizer"
)
Modal_Level(Summarizer_modal)
if text_summarizer_input.strip() == "":
try:
#### writing animation
write_hear_animation = insert_lottie_animation("lottie_animations/write-hear.json")
st_lottie(
animation_source=write_hear_animation,
speed=1,
reverse=False,loop=True,
quality="medium",
height=165,
width=240,
key="write hear"
)
except Exception as err:
st.warning("something went wrong...",err,icon="⚠️")
### enter paragraph length greater than 35 words
elif len(text_summarizer_input.split()) < 20:
st.warning("paragraph should be greater than 35 words",icon="✏️")
else:
def word_token_maxvalue(text:str)->int:
"""
converting paragraph into
tokens
"""
word_para = []
words = word_tokenize(text)
for i in words:
word_para.append(i)
return len(word_para)
@st.cache_data
def random_value_text(text:str)->int:
random_value = np.random.randint(
10,word_token_maxvalue(text),6
)
return random.choice(random_value)
def clean_data_for_summarization(text:str)->str:
clean_text = clean(
text=text,fix_unicode=True,
to_ascii=True,no_emoji=True,
lang="en",no_line_breaks=False,
keep_two_line_breaks=True
)
return clean_text
text_Max_length = st.slider(
label="Max length",
min_value=10,
max_value=word_token_maxvalue(text_summarizer_input),
key="text summarizer max length",
step=1,value=random_value_text(text_summarizer_input)
)
Generate_text_summary = st.button(
label="Generate summary",key="text summary"
)
try:
#### writing loading
writing_loading_animation = insert_lottie_animation("lottie_animations/writing-loading.json")
summary_generating_animation = st_lottie_spinner(
animation_source=writing_loading_animation,
speed=2,
reverse=False,loop=True,
quality="medium",
height=165,
width=240,
key="writing generating"
)
except Exception as err:
st.warning("something went wrong...",err,icon="⚠️")
#### initilization of modal
if Generate_text_summary:
if __name__=="__main__":
##### summary generation
with summary_generating_animation:
### modal
Text_Summary_output = Hugingface_summarization_modal(
summary_text=clean_data_for_summarization(text_summarizer_input),
modal_name=Summarizer_modal,
maximum_length=text_Max_length
)
##### summary displaying and copy
st.text("")
st.text("")
st.markdown("
Generated Summary
",unsafe_allow_html=True)
st.text("")
st.write(Text_Summary_output)
st.text("")
copy_text(Text_Summary_output)
st.text("")
st.text("")
###### original text desplay and copy
st.markdown("
",unsafe_allow_html=True)
##############################################################################################################
############################## pdf summarizer
#### pdf and text summarizer functions
#### displaying uploaded pdf file
def display_pdf_file(uploaded_file):
"""
it is used to display the
file on screen
"""
#### saving the uploaded file
def save_uploadfile(save_file):
with open(os.path.join("data",save_file.name),"wb") as f:
f.write(save_file.getbuffer())
return st.toast("file uploaded: {}".format(save_file.name))
try:
### display pdf on screen
def displayPDF(pdf_file):
with open(pdf_file,"rb") as f:
base64_pdf = base64.b64encode(f.read()).decode("utf-8")
pdf_display = f"""
"""
st.markdown(pdf_display,unsafe_allow_html=True)
### save and display file
save_uploadfile(uploaded_file)
pdf_file = "data/"+uploaded_file.name
displayPDF(pdf_file)
except Exception as e:
st.warning("Something Went wrong...\n\n",e,icon="⚠️")
#### Function to extract text from a specific page using pdfminer
def extract_text_pdfminer(pdf_file, page_number):
"""
this function extract pdf file
text by user input page number
"""
try:
extracted_text = ''
for i, page_layout in enumerate(extract_pages(pdf_file)):
if i == page_number - 1:
### Extract text elements and format them as closely as possible to the original layout
for element in page_layout:
if isinstance(element, LTTextContainer):
for text_line in element:
if isinstance(text_line, LTTextLine):
line = ''.join([char.get_text() for char in text_line if isinstance(char, LTChar)])
extracted_text += line.strip() + '\n'
return extracted_text
return st.warning("Invalid page number.",icon="⚠️")
except Exception as e:
st.warning("Something Went wrong...\n\n",e,icon="⚠️")
###############################################
##### clean text for summmarization task
def uploaded_Clean_Text_Summarization(clean_text:str)->str:
"""
it gives clean text for
summarization task
"""
try:
pattern = r'[|`~^$<>]'
cleaned_paragraph = re.sub(pattern, '', clean_text)
### using clean function
clean_output_para = clean(
text=cleaned_paragraph,fix_unicode=True,
to_ascii=True,no_emoji=True,
lang="en",no_line_breaks=False,
keep_two_line_breaks=True
)
except Exception as e:
st.warning("Something Went wrong...\n\n",e,icon="⚠️")
return clean_output_para
### convert paragraph into tokens
def generate_text_para_tokens(text_para:str)->int:
"""
converting paragraph into
tokens
"""
try:
pattern = r'[|`~#^$<>]'
cleaned_paragraph = re.sub(pattern, '', text_para)
#### using clean function
clean_para = clean(
text=cleaned_paragraph,fix_unicode=True,
to_ascii=True,no_emoji=True,
lang="en",no_line_breaks=False,
keep_two_line_breaks=True
)
word_tokens = []
for i in word_tokenize(clean_para):
word_tokens.append(i)
return len(np.array(word_tokens))
except Exception as e:
st.warning("Something Went wrong...\n\n",e,icon="⚠️")
### generates random value for slider
@st.cache_data
def random_text_para_value(para:str)->int:
try:
random_value = np.random.randint(
20, generate_text_para_tokens(para), 6
)
return random.choice(random_value)
except Exception as e:
st.warning("Something Went wrong...\n\n",e,icon="⚠️")
#### PDF files summarizer
def process_pdf(file):
reader = PdfReader(file)
page_count = len(reader.pages)
### pdf display and information column
pdf_display_tab, pdf_summarizer_tab = st.tabs([f"Displaying {file.name}","Pdf Summarizer"])
####### displaying pdf on pdf display tab
with pdf_display_tab:
st.markdown(f"
Pdf - {file.name}
",unsafe_allow_html=True)
pdf_col, pdf_info_col = st.columns([5,3],gap="medium")
with pdf_col:
with st.spinner("Displaying file..."):
if __name__=="__main__":
display_pdf_file(file)
with pdf_info_col:
st.write("Your File: {}".format(file.name))
st.write(f"Number of pages: {str(page_count)}")
st.markdown(insert_html("htmlfiles/pdf-summarizer-info.html"),unsafe_allow_html=True)
### pdf information and intract with pdf
with pdf_summarizer_tab:
st.text("")
st.markdown("
Extract pdf text
",unsafe_allow_html=True)
### toggle button for extracting text
extract_by_page_all = st.toggle(
label="Extract whole Text",key="toggle for extract text",
value=False
)
### extracting all pdf text
if extract_by_page_all:
st.write("Extract whole pdf Text")
if st.button("Extract Whole Pdf",key="whole pdf text extract"):
st.text("")
st.text("")
with st.spinner("Extracting pdf..."):
whole_pdf_text = extract_text(file)
st.markdown("
Whole PDF Text
",unsafe_allow_html=True)
st.text("")
st.write(whole_pdf_text)
else:
reader = PdfReader(file)
total_pages = len(reader.pages)
st.write("Extract by page Number")
pdf_page_no_col, pdf_page_noinfo_col = st.columns([3,5],gap="small")
with pdf_page_no_col:
### input page number
Pdf_page_number_input = st.number_input(
label="Select the page number",
min_value=1, max_value=total_pages,
value=1,key="pdf page number",step=1
)
with pdf_page_noinfo_col:
st.text("")
st.text("")
st.write(f"Selected page: {str(Pdf_page_number_input)}")
Extract_page_no_button = st.button(
label="Extract Page text",
key="Extract button for page"
)
st.text("")
st.text("")
if Extract_page_no_button:
text_pdfminer = extract_text_pdfminer(file, Pdf_page_number_input)
st.session_state['extracted_text'] = text_pdfminer ### Store the extracted text in session state
if 'extracted_text' in st.session_state:
Pdf_file_text = st.text_area(
label=f"Text data of {Pdf_page_number_input} page",
value= st.session_state['extracted_text'],
height=400
)
st.session_state['extracted_text'] = Pdf_file_text # Update the text in session state based on user's input
#### pdf summarizer
st.text("")
Max_length_pdf_slider = st.slider(
label="Max Length",key="Pdf summarizer slider",
min_value=10,max_value=generate_text_para_tokens(Pdf_file_text),
value=random_text_para_value(Pdf_file_text)
)
st.text("")
upload_Pdf_summary_btn_col, upload_Pdf_print_btn_col, upload_clean_Pdf_print_btn_col, blank_Pdf_col1, blank_Pdf_col2 = st.columns(
[4,4,4,7,3],gap="small"
)
with blank_Pdf_col1:
pass
with blank_Pdf_col2:
pass
with upload_Pdf_summary_btn_col:
Generate_upload_pdf_summary_btn = st.button(
label="Generate Summary",
key="Generate summary of uploaded text pdf"
)
with upload_clean_Pdf_print_btn_col:
Upload_clean_pdf_btn = st.button(
label="Print Clean Text",
key="Print clean pdf file"
)
with upload_Pdf_print_btn_col:
upload_pdf_print_button = st.button(
label="Print Uploaded Text",
key="Print uploadded pdf"
)
### clean text
if Upload_clean_pdf_btn:
with st.spinner("Generating Clean Text..."):
st.text("")
st.text("")
st.markdown("
",unsafe_allow_html=True)
if Main_menu == "PDF Summarizer":
### blank and app columns
Blank_pdf1 ,pdf_summarizer_col, Blank_pdf2 = st.columns([1,8,1],gap="small")
with Blank_pdf1:
pass
with Blank_pdf2:
pass
with pdf_summarizer_col:
st.text("")
st.header("PDF Summarizer") ### app heading
### File uploader function
app_file_upload = st.file_uploader("Upload a PDF or Text file", type=["pdf", "txt"])
if app_file_upload is not None:
### if pdf file
if app_file_upload.type == "application/pdf":
if __name__=="__main__":
process_pdf(app_file_upload)
#### if text file
elif app_file_upload.type == "text/plain":
if __name__=="__main__":
process_text(app_file_upload)
else:
st.info("Upload your pdf, text file")
#### app info
if Main_menu == "App Info":
Blank_app_info1, App_info_col, Blank_app_info2 = st.columns([2,8,2])
#### blank columns
with Blank_app_info1:
pass
with Blank_app_info2:
pass
### app info column
with App_info_col:
st.text("")
st.header("App Info")
st.text("")
if __name__=="__main__":
st.markdown(insert_html("htmlfiles/app-info.html"),
unsafe_allow_html=True
)