Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
from pydantic import BaseModel
|
| 3 |
-
import httpx
|
| 4 |
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
| 5 |
|
| 6 |
app = FastAPI()
|
| 7 |
|
|
@@ -12,35 +14,63 @@ class Notice(BaseModel):
|
|
| 12 |
# URL of the notice list page
|
| 13 |
url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
async with httpx.AsyncClient() as client:
|
| 18 |
-
# Send an asynchronous GET request
|
| 19 |
response = await client.get(url)
|
| 20 |
-
|
| 21 |
-
# Check if the request was successful
|
| 22 |
if response.status_code != 200:
|
| 23 |
-
|
|
|
|
| 24 |
|
| 25 |
# Parse the HTML content using a faster parser like 'lxml'
|
| 26 |
soup = BeautifulSoup(response.text, 'lxml')
|
| 27 |
-
|
| 28 |
-
# Find all rows or columns containing notice information
|
| 29 |
rows = soup.select('td a') # Directly target <a> tags inside <td>
|
| 30 |
|
| 31 |
-
#
|
| 32 |
notice_list = []
|
| 33 |
-
seen_links = set()
|
| 34 |
|
| 35 |
-
# Extract and de-duplicate notice name and links
|
| 36 |
for link_tag in rows:
|
| 37 |
notice_link = link_tag.get('href', '').strip()
|
| 38 |
notice_name = link_tag.text.strip()
|
| 39 |
|
| 40 |
if notice_link and notice_link not in seen_links:
|
| 41 |
seen_links.add(notice_link)
|
| 42 |
-
notice_name = notice_name or f"Notice {len(notice_list) + 1}"
|
| 43 |
notice_list.append({'name': notice_name, 'link': notice_link})
|
| 44 |
|
| 45 |
-
#
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, BackgroundTasks
|
| 2 |
from pydantic import BaseModel
|
| 3 |
+
import httpx
|
| 4 |
from bs4 import BeautifulSoup
|
| 5 |
+
import asyncio
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
|
| 8 |
app = FastAPI()
|
| 9 |
|
|
|
|
| 14 |
# URL of the notice list page
|
| 15 |
url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
|
| 16 |
|
| 17 |
+
# Cache to store notices and the last updated timestamp
|
| 18 |
+
cache = {
|
| 19 |
+
"notices": [],
|
| 20 |
+
"last_updated": None
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
CACHE_TTL = timedelta(minutes=5) # Cache Time-to-Live (5 minutes)
|
| 24 |
+
|
| 25 |
+
async def fetch_notices():
|
| 26 |
+
"""Fetch notices from the website and update the cache."""
|
| 27 |
async with httpx.AsyncClient() as client:
|
|
|
|
| 28 |
response = await client.get(url)
|
| 29 |
+
|
|
|
|
| 30 |
if response.status_code != 200:
|
| 31 |
+
print(f"Failed to retrieve content. Status code: {response.status_code}")
|
| 32 |
+
return
|
| 33 |
|
| 34 |
# Parse the HTML content using a faster parser like 'lxml'
|
| 35 |
soup = BeautifulSoup(response.text, 'lxml')
|
|
|
|
|
|
|
| 36 |
rows = soup.select('td a') # Directly target <a> tags inside <td>
|
| 37 |
|
| 38 |
+
# Process notices
|
| 39 |
notice_list = []
|
| 40 |
+
seen_links = set()
|
| 41 |
|
|
|
|
| 42 |
for link_tag in rows:
|
| 43 |
notice_link = link_tag.get('href', '').strip()
|
| 44 |
notice_name = link_tag.text.strip()
|
| 45 |
|
| 46 |
if notice_link and notice_link not in seen_links:
|
| 47 |
seen_links.add(notice_link)
|
| 48 |
+
notice_name = notice_name or f"Notice {len(notice_list) + 1}"
|
| 49 |
notice_list.append({'name': notice_name, 'link': notice_link})
|
| 50 |
|
| 51 |
+
# Update the cache
|
| 52 |
+
cache["notices"] = notice_list[:50] # Store top 50 notices
|
| 53 |
+
cache["last_updated"] = datetime.utcnow()
|
| 54 |
+
print("Cache updated.")
|
| 55 |
+
|
| 56 |
+
@app.on_event("startup")
|
| 57 |
+
async def startup_event():
|
| 58 |
+
"""Schedule the cache refresh task on application startup."""
|
| 59 |
+
asyncio.create_task(cache_refresh_task())
|
| 60 |
+
|
| 61 |
+
async def cache_refresh_task():
|
| 62 |
+
"""Background task to refresh cache periodically."""
|
| 63 |
+
while True:
|
| 64 |
+
await fetch_notices()
|
| 65 |
+
await asyncio.sleep(CACHE_TTL.total_seconds())
|
| 66 |
+
|
| 67 |
+
@app.get("/notices", response_model=list[Notice])
|
| 68 |
+
async def get_notices():
|
| 69 |
+
"""Serve notices from the cache."""
|
| 70 |
+
if cache["last_updated"] and datetime.utcnow() - cache["last_updated"] < CACHE_TTL:
|
| 71 |
+
# Serve cached data if it's still valid
|
| 72 |
+
return cache["notices"]
|
| 73 |
+
|
| 74 |
+
# If cache is expired or unavailable, fetch notices live
|
| 75 |
+
await fetch_notices()
|
| 76 |
+
return cache["notices"]
|