Spaces:
Paused
Paused
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # Function to scrape the notices from the website | |
| def scrape_notices(): | |
| # URL of the notice list page | |
| url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist' | |
| # Send a GET request to fetch the raw HTML content | |
| response = requests.get(url) | |
| # Check if the request was successful (status code 200) | |
| if response.status_code != 200: | |
| st.error(f"Failed to retrieve content. Status code: {response.status_code}") | |
| return [] | |
| # Parse the HTML content using BeautifulSoup | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Find all rows or columns containing notice information | |
| rows = soup.find_all('td') | |
| # Initialize a list to hold notice name and links | |
| notice_list = [] | |
| seen_links = set() # Set to track links we've already encountered | |
| # Iterate through all <td> tags and extract notice name and link | |
| for row in rows: | |
| # Find the anchor tag within the <td> | |
| link_tag = row.find('a') | |
| # Check if there's a valid link | |
| if link_tag: | |
| # Extract the name (text) and link (href) | |
| notice_name = link_tag.text.strip() | |
| notice_link = link_tag.get('href') | |
| # Handle cases where the name is missing or the text is just a link | |
| if not notice_name: | |
| notice_name = f"Notice {len(notice_list) + 1}" # Fallback if there's no text in the <a> tag | |
| # Check if the link is already in the set of seen links (i.e., duplicate) | |
| if notice_link not in seen_links: | |
| # If not a duplicate, add the link to the seen set and add notice to the list | |
| seen_links.add(notice_link) | |
| notice_list.append({'name': notice_name, 'link': notice_link}) | |
| return notice_list | |
| # Streamlit UI | |
| st.title("Durg University Notices") | |
| # Display an info message | |
| st.info("Click the button below to fetch the latest notices.") | |
| # Button to trigger scraping | |
| if st.button("Fetch Notices"): | |
| with st.spinner('Scraping notices...'): | |
| # Get the notice list by calling the scrape_notices function | |
| notices = scrape_notices() | |
| # Check if there are any notices | |
| if notices: | |
| # Display notices in a readable format | |
| for idx, notice in enumerate(notices[:30]): # Display first 30 notices | |
| st.write(f"**{idx + 1}. {notice['name']}**") | |
| st.write(f"[Download PDF]({notice['link']})") | |
| else: | |
| st.warning("No notices found or failed to scrape.") | |
| # Footer with some information | |
| st.sidebar.markdown(""" | |
| **About**: | |
| This app scrapes and displays the latest notices from the Durg University website. | |
| Click the button to fetch updated notices. | |
| """) | |