slimshadow commited on
Commit
2bc4c3f
·
verified ·
1 Parent(s): 264a9db

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. app.py +57 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+
6
+ app = FastAPI()
7
+
8
+ class Notice(BaseModel):
9
+ name: str
10
+ link: str
11
+
12
+ # URL of the notice list page
13
+ url = 'https://www.durguniversity.ac.in/index.php/Home/Noticelist'
14
+
15
+ @app.get("/notices", response_model=list[Notice])
16
+ async def get_notices():
17
+ # Send a GET request to fetch the raw HTML content
18
+ response = requests.get(url)
19
+
20
+ # Check if the request was successful (status code 200)
21
+ if response.status_code != 200:
22
+ return {"error": f"Failed to retrieve content. Status code: {response.status_code}"}
23
+
24
+ # Parse the HTML content using BeautifulSoup
25
+ soup = BeautifulSoup(response.text, 'html.parser')
26
+
27
+ # Find all rows or columns containing notice information
28
+ rows = soup.find_all('td')
29
+
30
+ # Initialize a list to hold notice name and links
31
+ notice_list = []
32
+ seen_links = set() # Set to track links we've already encountered
33
+
34
+ # Iterate through all <td> tags and extract notice name and link
35
+ for row in rows:
36
+ # Find the anchor tag within the <td>
37
+ link_tag = row.find('a')
38
+
39
+ # Check if there's a valid link
40
+ if link_tag:
41
+ # Extract the name (text) and link (href)
42
+ notice_name = link_tag.text.strip()
43
+ notice_link = link_tag.get('href')
44
+
45
+ # Handle cases where the name is missing or the text is just a link
46
+ if not notice_name:
47
+ notice_name = f"Notice {len(notice_list) + 1}" # Fallback if there's no text in the <a> tag
48
+
49
+ # Check if the link is already in the set of seen links (i.e., duplicate)
50
+ if notice_link not in seen_links:
51
+ # If not a duplicate, add the link to the seen set and add notice to the list
52
+ seen_links.add(notice_link)
53
+ notice_list.append({'name': notice_name, 'link': notice_link})
54
+
55
+ # Return the list of notices as JSON
56
+ return notice_list[:30] # Return top 30 notices
57
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ beautifulsoup4