Spaces:

slimshadow
/

DU-notice-api

Paused

App Files Files Community

slimshadow commited on Nov 29, 2024

Commit

2bc4c3f

verified ·

1 Parent(s): 264a9db

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +16 -0
app.py +57 -0
requirements.txt +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+import requests
+from bs4 import BeautifulSoup
+app = FastAPI()
+class Notice(BaseModel):
+    name: str
+    link: str
+# URL of the notice list page
+url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
+@app.get("/notices", response_model=list[Notice])
+async def get_notices():
+    # Send a GET request to fetch the raw HTML content
+    response = requests.get(url)
+    # Check if the request was successful (status code 200)
+    if response.status_code != 200:
+        return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
+    # Parse the HTML content using BeautifulSoup
+    soup = BeautifulSoup(response.text, 'html.parser')
+    # Find all rows or columns containing notice information
+    rows = soup.find_all('td')
+    # Initialize a list to hold notice name and links
+    notice_list = []
+    seen_links = set()  # Set to track links we've already encountered
+    # Iterate through all <td> tags and extract notice name and link
+    for row in rows:
+        # Find the anchor tag within the <td>
+        link_tag = row.find('a')
+        # Check if there's a valid link
+        if link_tag:
+            # Extract the name (text) and link (href)
+            notice_name = link_tag.text.strip()
+            notice_link = link_tag.get('href')
+            # Handle cases where the name is missing or the text is just a link
+            if not notice_name:
+                notice_name = f"Notice {len(notice_list) + 1}"  # Fallback if there's no text in the <a> tag
+            # Check if the link is already in the set of seen links (i.e., duplicate)
+            if notice_link not in seen_links:
+                # If not a duplicate, add the link to the seen set and add notice to the list
+                seen_links.add(notice_link)
+                notice_list.append({'name': notice_name, 'link': notice_link})
+    # Return the list of notices as JSON
+    return notice_list[:30]  # Return top 30 notices

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+fastapi
+uvicorn
+beautifulsoup4