Spaces:

slimshadow
/

DU-notice-api

Paused

App Files Files Community

slimshadow commited on Nov 29, 2024

Commit

0c85842

verified ·

1 Parent(s): ca6f80b

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -32

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-import requests
 from bs4 import BeautifulSoup
 app = FastAPI()
@@ -14,44 +14,33 @@ url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
 @app.get("/notices", response_model=list[Notice])
 async def get_notices():
-    # Send a GET request to fetch the raw HTML content
-    response = requests.get(url)
-    # Check if the request was successful (status code 200)
     if response.status_code != 200:
         return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
-    # Parse the HTML content using BeautifulSoup
-    soup = BeautifulSoup(response.text, 'html.parser')
     # Find all rows or columns containing notice information
-    rows = soup.find_all('td')
     # Initialize a list to hold notice name and links
     notice_list = []
-    seen_links = set()  # Set to track links we've already encountered
-    # Iterate through all <td> tags and extract notice name and link
-    for row in rows:
-        # Find the anchor tag within the <td>
-        link_tag = row.find('a')
-        # Check if there's a valid link
-        if link_tag:
-            # Extract the name (text) and link (href)
-            notice_name = link_tag.text.strip()
-            notice_link = link_tag.get('href')
-            # Handle cases where the name is missing or the text is just a link
-            if not notice_name:
-                notice_name = f"Notice {len(notice_list) + 1}"  # Fallback if there's no text in the <a> tag
-            # Check if the link is already in the set of seen links (i.e., duplicate)
-            if notice_link not in seen_links:
-                # If not a duplicate, add the link to the seen set and add notice to the list
-                seen_links.add(notice_link)
-                notice_list.append({'name': notice_name, 'link': notice_link})
-    # Return the list of notices as JSON
-    return notice_list[:50]  # Return top 30 notices

 from fastapi import FastAPI
 from pydantic import BaseModel
+import httpx  # Asynchronous HTTP client
 from bs4 import BeautifulSoup
 app = FastAPI()
 @app.get("/notices", response_model=list[Notice])
 async def get_notices():
+    async with httpx.AsyncClient() as client:
+        # Send an asynchronous GET request
+        response = await client.get(url)
+    # Check if the request was successful
     if response.status_code != 200:
         return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
+    # Parse the HTML content using a faster parser like 'lxml'
+    soup = BeautifulSoup(response.text, 'lxml')
     # Find all rows or columns containing notice information
+    rows = soup.select('td a')  # Directly target <a> tags inside <td>
     # Initialize a list to hold notice name and links
     notice_list = []
+    seen_links = set()  # Track unique links
+    # Extract and de-duplicate notice name and links
+    for link_tag in rows:
+        notice_link = link_tag.get('href', '').strip()
+        notice_name = link_tag.text.strip()
+        if notice_link and notice_link not in seen_links:
+            seen_links.add(notice_link)
+            notice_name = notice_name or f"Notice {len(notice_list) + 1}"  # Fallback for empty names
+            notice_list.append({'name': notice_name, 'link': notice_link})
+    # Return top 50 notices
+    return notice_list[:50]