Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from pydantic import BaseModel
|
| 3 |
-
import
|
| 4 |
from bs4 import BeautifulSoup
|
| 5 |
|
| 6 |
app = FastAPI()
|
|
@@ -14,44 +14,33 @@ url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
|
|
| 14 |
|
| 15 |
@app.get("/notices", response_model=list[Notice])
|
| 16 |
async def get_notices():
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
if response.status_code != 200:
|
| 22 |
return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
|
| 23 |
|
| 24 |
-
# Parse the HTML content using
|
| 25 |
-
soup = BeautifulSoup(response.text, '
|
| 26 |
|
| 27 |
# Find all rows or columns containing notice information
|
| 28 |
-
rows = soup.
|
| 29 |
|
| 30 |
# Initialize a list to hold notice name and links
|
| 31 |
notice_list = []
|
| 32 |
-
seen_links = set() #
|
| 33 |
|
| 34 |
-
#
|
| 35 |
-
for
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
notice_name
|
| 43 |
-
notice_link = link_tag.get('href')
|
| 44 |
-
|
| 45 |
-
# Handle cases where the name is missing or the text is just a link
|
| 46 |
-
if not notice_name:
|
| 47 |
-
notice_name = f"Notice {len(notice_list) + 1}" # Fallback if there's no text in the <a> tag
|
| 48 |
-
|
| 49 |
-
# Check if the link is already in the set of seen links (i.e., duplicate)
|
| 50 |
-
if notice_link not in seen_links:
|
| 51 |
-
# If not a duplicate, add the link to the seen set and add notice to the list
|
| 52 |
-
seen_links.add(notice_link)
|
| 53 |
-
notice_list.append({'name': notice_name, 'link': notice_link})
|
| 54 |
-
|
| 55 |
-
# Return the list of notices as JSON
|
| 56 |
-
return notice_list[:50] # Return top 30 notices
|
| 57 |
|
|
|
|
|
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from pydantic import BaseModel
|
| 3 |
+
import httpx # Asynchronous HTTP client
|
| 4 |
from bs4 import BeautifulSoup
|
| 5 |
|
| 6 |
app = FastAPI()
|
|
|
|
| 14 |
|
| 15 |
@app.get("/notices", response_model=list[Notice])
|
| 16 |
async def get_notices():
|
| 17 |
+
async with httpx.AsyncClient() as client:
|
| 18 |
+
# Send an asynchronous GET request
|
| 19 |
+
response = await client.get(url)
|
| 20 |
+
|
| 21 |
+
# Check if the request was successful
|
| 22 |
if response.status_code != 200:
|
| 23 |
return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
|
| 24 |
|
| 25 |
+
# Parse the HTML content using a faster parser like 'lxml'
|
| 26 |
+
soup = BeautifulSoup(response.text, 'lxml')
|
| 27 |
|
| 28 |
# Find all rows or columns containing notice information
|
| 29 |
+
rows = soup.select('td a') # Directly target <a> tags inside <td>
|
| 30 |
|
| 31 |
# Initialize a list to hold notice name and links
|
| 32 |
notice_list = []
|
| 33 |
+
seen_links = set() # Track unique links
|
| 34 |
|
| 35 |
+
# Extract and de-duplicate notice name and links
|
| 36 |
+
for link_tag in rows:
|
| 37 |
+
notice_link = link_tag.get('href', '').strip()
|
| 38 |
+
notice_name = link_tag.text.strip()
|
| 39 |
+
|
| 40 |
+
if notice_link and notice_link not in seen_links:
|
| 41 |
+
seen_links.add(notice_link)
|
| 42 |
+
notice_name = notice_name or f"Notice {len(notice_list) + 1}" # Fallback for empty names
|
| 43 |
+
notice_list.append({'name': notice_name, 'link': notice_link})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
+
# Return top 50 notices
|
| 46 |
+
return notice_list[:50]
|