slimshadow commited on
Commit
0c85842
·
verified ·
1 Parent(s): ca6f80b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -32
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- import requests
4
  from bs4 import BeautifulSoup
5
 
6
  app = FastAPI()
@@ -14,44 +14,33 @@ url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
14
 
15
  @app.get("/notices", response_model=list[Notice])
16
  async def get_notices():
17
- # Send a GET request to fetch the raw HTML content
18
- response = requests.get(url)
19
-
20
- # Check if the request was successful (status code 200)
 
21
  if response.status_code != 200:
22
  return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
23
 
24
- # Parse the HTML content using BeautifulSoup
25
- soup = BeautifulSoup(response.text, 'html.parser')
26
 
27
  # Find all rows or columns containing notice information
28
- rows = soup.find_all('td')
29
 
30
  # Initialize a list to hold notice name and links
31
  notice_list = []
32
- seen_links = set() # Set to track links we've already encountered
33
 
34
- # Iterate through all <td> tags and extract notice name and link
35
- for row in rows:
36
- # Find the anchor tag within the <td>
37
- link_tag = row.find('a')
38
-
39
- # Check if there's a valid link
40
- if link_tag:
41
- # Extract the name (text) and link (href)
42
- notice_name = link_tag.text.strip()
43
- notice_link = link_tag.get('href')
44
-
45
- # Handle cases where the name is missing or the text is just a link
46
- if not notice_name:
47
- notice_name = f"Notice {len(notice_list) + 1}" # Fallback if there's no text in the <a> tag
48
-
49
- # Check if the link is already in the set of seen links (i.e., duplicate)
50
- if notice_link not in seen_links:
51
- # If not a duplicate, add the link to the seen set and add notice to the list
52
- seen_links.add(notice_link)
53
- notice_list.append({'name': notice_name, 'link': notice_link})
54
-
55
- # Return the list of notices as JSON
56
- return notice_list[:50] # Return top 30 notices
57
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ import httpx # Asynchronous HTTP client
4
  from bs4 import BeautifulSoup
5
 
6
  app = FastAPI()
 
14
 
15
  @app.get("/notices", response_model=list[Notice])
16
  async def get_notices():
17
+ async with httpx.AsyncClient() as client:
18
+ # Send an asynchronous GET request
19
+ response = await client.get(url)
20
+
21
+ # Check if the request was successful
22
  if response.status_code != 200:
23
  return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
24
 
25
+ # Parse the HTML content using a faster parser like 'lxml'
26
+ soup = BeautifulSoup(response.text, 'lxml')
27
 
28
  # Find all rows or columns containing notice information
29
+ rows = soup.select('td a') # Directly target <a> tags inside <td>
30
 
31
  # Initialize a list to hold notice name and links
32
  notice_list = []
33
+ seen_links = set() # Track unique links
34
 
35
+ # Extract and de-duplicate notice name and links
36
+ for link_tag in rows:
37
+ notice_link = link_tag.get('href', '').strip()
38
+ notice_name = link_tag.text.strip()
39
+
40
+ if notice_link and notice_link not in seen_links:
41
+ seen_links.add(notice_link)
42
+ notice_name = notice_name or f"Notice {len(notice_list) + 1}" # Fallback for empty names
43
+ notice_list.append({'name': notice_name, 'link': notice_link})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # Return top 50 notices
46
+ return notice_list[:50]