# creating function file import streamlit as st import requests from bs4 import BeautifulSoup import re from cleantext import clean import streamlit.components.v1 as component def Copy_Text(text): """ copy button to copy text """ Html_Code = f""" Copy Button
Copied!

{text}

""" component.html(Html_Code,height=60,width=60) def scrape_paragraphs(url, num_paragraphs): try: response = requests.get(url) if response.status_code != 200: return [] soup = BeautifulSoup(response.text, 'lxml') paragraphs = [p.get_text() for p in soup.find_all('p')[:num_paragraphs]] return paragraphs except Exception as e: st.warning(f"Error...\n{e}",icon="⚠️") return [] ### text cleaning def Text_Cleaning(text:str)->str: """ this function gives clean text of the paragraphs , etc which makes easy to understand of the text """ pattern = r'[`^]' cleaned_paragraph = re.sub(pattern, '', text) clean_text = clean( text=cleaned_paragraph,fix_unicode=True, to_ascii=True, no_line_breaks=False, keep_two_line_breaks=True ) pattern = r'\[\d+\]' cleaned_text_output = re.sub(pattern, '', clean_text) return cleaned_text_output