CoderHassan commited on
Commit
9d37e34
·
verified ·
1 Parent(s): 71520e8

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +19 -8
scraper.py CHANGED
@@ -8,17 +8,29 @@ def fetch_tariff_data(url):
8
  response.raise_for_status()
9
  soup = BeautifulSoup(response.content, 'html.parser')
10
 
 
 
 
 
 
 
11
  tariff_data = []
12
- table = soup.find('table') # Assuming the first table on the page is the tariff table
13
- rows = table.find_all('tr')[1:] # Skipping the header row
14
-
15
  for row in rows:
16
  cols = row.find_all('td')
17
  if len(cols) >= 2:
18
- lower_limit = parse_float(cols[0].text.strip().split()[0])
19
- upper_limit = parse_float(cols[0].text.strip().split()[2] if 'to' in cols[0].text else 'inf')
20
  rate = parse_float(cols[-1].text.strip())
21
-
 
 
 
 
 
 
 
 
22
  tariff_data.append({
23
  'lower_limit': lower_limit,
24
  'upper_limit': upper_limit,
@@ -32,7 +44,6 @@ def fetch_tariff_data(url):
32
 
33
  def parse_float(value):
34
  try:
35
- return float(value.replace(',', ''))
36
  except ValueError:
37
  return float('inf')
38
-
 
8
  response.raise_for_status()
9
  soup = BeautifulSoup(response.content, 'html.parser')
10
 
11
+ # Locate the table with tariff data
12
+ table = soup.find('table')
13
+ if not table:
14
+ print("Error: No table found on the page.")
15
+ return None
16
+
17
  tariff_data = []
18
+ rows = table.find_all('tr')[1:] # Skip the header row
19
+
 
20
  for row in rows:
21
  cols = row.find_all('td')
22
  if len(cols) >= 2:
23
+ particulars = cols[0].text.strip()
 
24
  rate = parse_float(cols[-1].text.strip())
25
+
26
+ if "Units" in particulars:
27
+ limits = [parse_float(x) for x in particulars.split(' ')[1:]]
28
+ lower_limit = limits[0]
29
+ upper_limit = limits[1] if len(limits) > 1 else float('inf')
30
+ else:
31
+ lower_limit = 0
32
+ upper_limit = float('inf')
33
+
34
  tariff_data.append({
35
  'lower_limit': lower_limit,
36
  'upper_limit': upper_limit,
 
44
 
45
  def parse_float(value):
46
  try:
47
+ return float(value.replace(',', '').replace('PKR', '').strip())
48
  except ValueError:
49
  return float('inf')