slimshadow commited on
Commit
6df57b1
·
verified ·
1 Parent(s): df3f93a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ # Function to scrape the notices from the website
6
+ def scrape_notices():
7
+ # URL of the notice list page
8
+ url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
9
+
10
+ # Send a GET request to fetch the raw HTML content
11
+ response = requests.get(url)
12
+
13
+ # Check if the request was successful (status code 200)
14
+ if response.status_code != 200:
15
+ st.error(f"Failed to retrieve content. Status code: {response.status_code}")
16
+ return []
17
+
18
+ # Parse the HTML content using BeautifulSoup
19
+ soup = BeautifulSoup(response.text, 'html.parser')
20
+
21
+ # Find all rows or columns containing notice information
22
+ rows = soup.find_all('td')
23
+
24
+ # Initialize a list to hold notice name and links
25
+ notice_list = []
26
+ seen_links = set() # Set to track links we've already encountered
27
+
28
+ # Iterate through all <td> tags and extract notice name and link
29
+ for row in rows:
30
+ # Find the anchor tag within the <td>
31
+ link_tag = row.find('a')
32
+
33
+ # Check if there's a valid link
34
+ if link_tag:
35
+ # Extract the name (text) and link (href)
36
+ notice_name = link_tag.text.strip()
37
+ notice_link = link_tag.get('href')
38
+
39
+ # Handle cases where the name is missing or the text is just a link
40
+ if not notice_name:
41
+ notice_name = f"Notice {len(notice_list) + 1}" # Fallback if there's no text in the <a> tag
42
+
43
+ # Check if the link is already in the set of seen links (i.e., duplicate)
44
+ if notice_link not in seen_links:
45
+ # If not a duplicate, add the link to the seen set and add notice to the list
46
+ seen_links.add(notice_link)
47
+ notice_list.append({'name': notice_name, 'link': notice_link})
48
+
49
+ return notice_list
50
+
51
+ # Streamlit UI
52
+ st.title("Durg University Notices")
53
+
54
+ # Display an info message
55
+ st.info("Click the button below to fetch the latest notices.")
56
+
57
+ # Button to trigger scraping
58
+ if st.button("Fetch Notices"):
59
+ with st.spinner('Scraping notices...'):
60
+ # Get the notice list by calling the scrape_notices function
61
+ notices = scrape_notices()
62
+
63
+ # Check if there are any notices
64
+ if notices:
65
+ # Display notices in a readable format
66
+ for idx, notice in enumerate(notices[:30]): # Display first 30 notices
67
+ st.write(f"**{idx + 1}. {notice['name']}**")
68
+ st.write(f"[Download PDF]({notice['link']})")
69
+ else:
70
+ st.warning("No notices found or failed to scrape.")
71
+
72
+ # Footer with some information
73
+ st.sidebar.markdown("""
74
+ **About**:
75
+ This app scrapes and displays the latest notices from the Durg University website.
76
+ Click the button to fetch updated notices.
77
+ """)