Files: 2 files loaded === app.py === import streamlit as st import requests from bs4 import BeautifulSoup import re # Function to scrape only visible text from the given URL def scrape_visible_text_from_url(url): try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') # Remove script, style, and other non-visible tags for tag in soup(["script", "style", "meta", "link", "noscript", "header", "footer", "aside", "nav", "img"]): tag.extract() # Get the header content header_content = soup.find("header") header_text = header_content.get_text() if header_content else "" # Get the paragraph content paragraph_content = soup.find_all("p") paragraph_text = " ".join([p.get_text() for p in paragraph_content]) # Combine header and paragraph text visible_text = f"{header_text}\n\n{paragraph_text}" # Remove multiple whitespaces and newlines visible_text = re.sub(r'\s+', ' ', visible_text) return visible_text.strip() except Exception as e: st.error(f"Error occurred while scraping the data: {e}") return None # Streamlit UI def main(): st.title("Web Data Scraper") # Get the URL from the user url_input = st.text_input("Enter the URL of the web page:", "") if st.button("Scrape Visible Text"): if url_input: # Extract visible text from the URL data = scrape_visible_text_from_url(url_input) if data: st.success("Visible text successfully scraped!") st.subheader("Scraped Text:") st.write(data) else: st.warning("Failed to scrape visible text from the URL.") else: st.warning("Please enter a valid URL.") if __name__ == "__main__": main() === requirements.txt === aiohttp==3.8.5 aiosignal==1.3.1 altair==5.0.1 async-timeout==4.0.2 attrs==23.1.0 beautifulsoup4==4.12.2 blinker==1.6.2 bs4==0.0.1 cachetools==5.3.1 certifi==2023.7.22 charset-normalizer==3.2.0 click==8.1.6 decorator==5.1.1 frozenlist==1.4.0 gitdb==4.0.10 GitPython==3.1.32 idna==3.4 importlib-metadata==6.8.0 Jinja2==3.1.2 jsonschema==4.18.4 jsonschema-specifications==2023.7.1 markdown-it-py==3.0.0 MarkupSafe==2.1.3 mdurl==0.1.2 multidict==6.0.4 numpy==1.25.2 openai==0.27.8 packaging==23.1 pandas==2.0.3 Pillow==9.5.0 protobuf==4.23.4 pyarrow==12.0.1 pydeck==0.8.0 Pygments==2.15.1 Pympler==1.0.1 python-dateutil==2.8.2 python-dotenv==1.0.0 pytz==2023.3 pytz-deprecation-shim==0.1.0.post0 referencing==0.30.0 requests==2.31.0 rich==13.5.2 rpds-py==0.9.2 six==1.16.0 smmap==5.0.0 soupsieve==2.4.1 streamlit==1.25.0 tenacity==8.2.2 toml==0.10.2 toolz==0.12.0 tornado==6.3.2 tqdm==4.65.0 typing_extensions==4.7.1 tzdata==2023.3 tzlocal==4.3.1 urllib3==2.0.4 validators==0.20.0 watchdog==3.0.0 yarl==1.9.2 zipp==3.16.2