Spaces:

slimshadow
/

durgu-notice

Paused

App Files Files Community

durgu-notice / app.py

slimshadow

Create app.py

6df57b1 verified over 1 year ago

raw

history blame contribute delete

2.85 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup

	# Function to scrape the notices from the website
	def scrape_notices():
	# URL of the notice list page
	url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'

	# Send a GET request to fetch the raw HTML content
	response = requests.get(url)

	# Check if the request was successful (status code 200)
	if response.status_code != 200:
	st.error(f"Failed to retrieve content. Status code: {response.status_code}")
	return []

	# Parse the HTML content using BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find all rows or columns containing notice information
	rows = soup.find_all('td')

	# Initialize a list to hold notice name and links
	notice_list = []
	seen_links = set() # Set to track links we've already encountered

	# Iterate through all <td> tags and extract notice name and link
	for row in rows:
	# Find the anchor tag within the <td>
	link_tag = row.find('a')

	# Check if there's a valid link
	if link_tag:
	# Extract the name (text) and link (href)
	notice_name = link_tag.text.strip()
	notice_link = link_tag.get('href')

	# Handle cases where the name is missing or the text is just a link
	if not notice_name:
	notice_name = f"Notice {len(notice_list) + 1}" # Fallback if there's no text in the <a> tag

	# Check if the link is already in the set of seen links (i.e., duplicate)
	if notice_link not in seen_links:
	# If not a duplicate, add the link to the seen set and add notice to the list
	seen_links.add(notice_link)
	notice_list.append({'name': notice_name, 'link': notice_link})

	return notice_list

	# Streamlit UI
	st.title("Durg University Notices")

	# Display an info message
	st.info("Click the button below to fetch the latest notices.")

	# Button to trigger scraping
	if st.button("Fetch Notices"):
	with st.spinner('Scraping notices...'):
	# Get the notice list by calling the scrape_notices function
	notices = scrape_notices()

	# Check if there are any notices
	if notices:
	# Display notices in a readable format
	for idx, notice in enumerate(notices[:30]): # Display first 30 notices
	st.write(f"{idx + 1}. {notice['name']}")
	st.write(f"[Download PDF]({notice['link']})")
	else:
	st.warning("No notices found or failed to scrape.")

	# Footer with some information
	st.sidebar.markdown("""
	About:
	This app scrapes and displays the latest notices from the Durg University website.
	Click the button to fetch updated notices.
	""")