Shami96 commited on
Commit
b1e29d6
·
verified ·
1 Parent(s): 9ae7124

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +34 -8
utils.py CHANGED
@@ -1,10 +1,36 @@
1
  # utils.py
2
 
3
- import pdfplumber
4
-
5
- def extract_text_from_pdf(file):
6
- text = ""
7
- with pdfplumber.open(file) as pdf:
8
- for page in pdf.pages:
9
- text += page.extract_text() or ""
10
- return text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # utils.py
2
 
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+
6
+ def fetch_active_tenders():
7
+ url = "https://www.ppra.org.pk/dad_tenders.asp"
8
+ response = requests.get(url)
9
+ soup = BeautifulSoup(response.content, "html.parser")
10
+
11
+ tenders = []
12
+ table = soup.find("table") # Adjust selector based on actual HTML structure
13
+ if table:
14
+ rows = table.find_all("tr")[1:] # Skip header row
15
+ for row in rows:
16
+ cols = row.find_all("td")
17
+ if len(cols) >= 5:
18
+ tender = {
19
+ "tender_no": cols[0].get_text(strip=True),
20
+ "organization": cols[1].get_text(strip=True),
21
+ "title": cols[2].get_text(strip=True),
22
+ "advertised_date": cols[3].get_text(strip=True),
23
+ "closing_date": cols[4].get_text(strip=True),
24
+ }
25
+ tenders.append(tender)
26
+ return tenders
27
+
28
+ def fetch_tender_details(tender_no):
29
+ # Construct the URL to the tender's detail page
30
+ detail_url = f"https://www.ppra.org.pk/tender/{tender_no}.asp" # Placeholder URL
31
+ response = requests.get(detail_url)
32
+ soup = BeautifulSoup(response.content, "html.parser")
33
+
34
+ # Extract detailed information
35
+ details = soup.get_text(separator="\n", strip=True)
36
+ return details