Spaces:
Sleeping
Sleeping
Update scraper.py
Browse files- scraper.py +19 -8
scraper.py
CHANGED
|
@@ -8,17 +8,29 @@ def fetch_tariff_data(url):
|
|
| 8 |
response.raise_for_status()
|
| 9 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
tariff_data = []
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
for row in rows:
|
| 16 |
cols = row.find_all('td')
|
| 17 |
if len(cols) >= 2:
|
| 18 |
-
|
| 19 |
-
upper_limit = parse_float(cols[0].text.strip().split()[2] if 'to' in cols[0].text else 'inf')
|
| 20 |
rate = parse_float(cols[-1].text.strip())
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
tariff_data.append({
|
| 23 |
'lower_limit': lower_limit,
|
| 24 |
'upper_limit': upper_limit,
|
|
@@ -32,7 +44,6 @@ def fetch_tariff_data(url):
|
|
| 32 |
|
| 33 |
def parse_float(value):
|
| 34 |
try:
|
| 35 |
-
return float(value.replace(',', ''))
|
| 36 |
except ValueError:
|
| 37 |
return float('inf')
|
| 38 |
-
|
|
|
|
| 8 |
response.raise_for_status()
|
| 9 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 10 |
|
| 11 |
+
# Locate the table with tariff data
|
| 12 |
+
table = soup.find('table')
|
| 13 |
+
if not table:
|
| 14 |
+
print("Error: No table found on the page.")
|
| 15 |
+
return None
|
| 16 |
+
|
| 17 |
tariff_data = []
|
| 18 |
+
rows = table.find_all('tr')[1:] # Skip the header row
|
| 19 |
+
|
|
|
|
| 20 |
for row in rows:
|
| 21 |
cols = row.find_all('td')
|
| 22 |
if len(cols) >= 2:
|
| 23 |
+
particulars = cols[0].text.strip()
|
|
|
|
| 24 |
rate = parse_float(cols[-1].text.strip())
|
| 25 |
+
|
| 26 |
+
if "Units" in particulars:
|
| 27 |
+
limits = [parse_float(x) for x in particulars.split(' ')[1:]]
|
| 28 |
+
lower_limit = limits[0]
|
| 29 |
+
upper_limit = limits[1] if len(limits) > 1 else float('inf')
|
| 30 |
+
else:
|
| 31 |
+
lower_limit = 0
|
| 32 |
+
upper_limit = float('inf')
|
| 33 |
+
|
| 34 |
tariff_data.append({
|
| 35 |
'lower_limit': lower_limit,
|
| 36 |
'upper_limit': upper_limit,
|
|
|
|
| 44 |
|
| 45 |
def parse_float(value):
|
| 46 |
try:
|
| 47 |
+
return float(value.replace(',', '').replace('PKR', '').strip())
|
| 48 |
except ValueError:
|
| 49 |
return float('inf')
|
|
|