# creating function file
import streamlit as st
import requests
from bs4 import BeautifulSoup
import re
from cleantext import clean
import streamlit.components.v1 as component
def Copy_Text(text):
"""
copy button to copy text
"""
Html_Code = f"""
Copy Button
"""
component.html(Html_Code,height=60,width=60)
def scrape_paragraphs(url, num_paragraphs):
try:
response = requests.get(url)
if response.status_code != 200:
return []
soup = BeautifulSoup(response.text, 'lxml')
paragraphs = [p.get_text() for p in soup.find_all('p')[:num_paragraphs]]
return paragraphs
except Exception as e:
st.warning(f"Error...\n{e}",icon="⚠️")
return []
### text cleaning
def Text_Cleaning(text:str)->str:
"""
this function gives clean
text of the paragraphs , etc
which makes easy to understand of the text
"""
pattern = r'[`^]'
cleaned_paragraph = re.sub(pattern, '', text)
clean_text = clean(
text=cleaned_paragraph,fix_unicode=True,
to_ascii=True,
no_line_breaks=False,
keep_two_line_breaks=True
)
pattern = r'\[\d+\]'
cleaned_text_output = re.sub(pattern, '', clean_text)
return cleaned_text_output