Spaces:

slimshadow
/

durgu-notice

Paused

App Files Files Community

slimshadow commited on Nov 29, 2024

Commit

6df57b1

verified ·

1 Parent(s): df3f93a

Create app.py

Browse files

Files changed (1) hide show

app.py +77 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+# Function to scrape the notices from the website
+def scrape_notices():
+    # URL of the notice list page
+    url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
+    # Send a GET request to fetch the raw HTML content
+    response = requests.get(url)
+    # Check if the request was successful (status code 200)
+    if response.status_code != 200:
+        st.error(f"Failed to retrieve content. Status code: {response.status_code}")
+        return []
+    # Parse the HTML content using BeautifulSoup
+    soup = BeautifulSoup(response.text, 'html.parser')
+    # Find all rows or columns containing notice information
+    rows = soup.find_all('td')
+    # Initialize a list to hold notice name and links
+    notice_list = []
+    seen_links = set()  # Set to track links we've already encountered
+    # Iterate through all <td> tags and extract notice name and link
+    for row in rows:
+        # Find the anchor tag within the <td>
+        link_tag = row.find('a')
+        # Check if there's a valid link
+        if link_tag:
+            # Extract the name (text) and link (href)
+            notice_name = link_tag.text.strip()
+            notice_link = link_tag.get('href')
+            # Handle cases where the name is missing or the text is just a link
+            if not notice_name:
+                notice_name = f"Notice {len(notice_list) + 1}"  # Fallback if there's no text in the <a> tag
+            # Check if the link is already in the set of seen links (i.e., duplicate)
+            if notice_link not in seen_links:
+                # If not a duplicate, add the link to the seen set and add notice to the list
+                seen_links.add(notice_link)
+                notice_list.append({'name': notice_name, 'link': notice_link})
+    return notice_list
+# Streamlit UI
+st.title("Durg University Notices")
+# Display an info message
+st.info("Click the button below to fetch the latest notices.")
+# Button to trigger scraping
+if st.button("Fetch Notices"):
+    with st.spinner('Scraping notices...'):
+        # Get the notice list by calling the scrape_notices function
+        notices = scrape_notices()
+        # Check if there are any notices
+        if notices:
+            # Display notices in a readable format
+            for idx, notice in enumerate(notices[:30]):  # Display first 30 notices
+                st.write(f"**{idx + 1}. {notice['name']}**")
+                st.write(f"[Download PDF]({notice['link']})")
+        else:
+            st.warning("No notices found or failed to scrape.")
+# Footer with some information
+st.sidebar.markdown("""
+    **About**:
+    This app scrapes and displays the latest notices from the Durg University website.
+    Click the button to fetch updated notices.
+""")