Engineer786 commited on
Commit
096c143
·
verified ·
1 Parent(s): bdc1d95

Update tariff_scraper.py

Browse files
Files changed (1) hide show
  1. tariff_scraper.py +8 -3
tariff_scraper.py CHANGED
@@ -7,7 +7,7 @@ TARIFF_URLS = {
7
  "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
8
  }
9
 
10
- def scrape_tariff_data_to_csv(url, output_file="pesco_tariff_data.csv"):
11
  """
12
  Scrape tariff data from the given URL and save it to a CSV file.
13
 
@@ -31,15 +31,20 @@ def scrape_tariff_data_to_csv(url, output_file="pesco_tariff_data.csv"):
31
  if not tariff_table:
32
  return "No table found on the webpage."
33
 
34
- # Extract data and convert it into a structured format
35
  data = []
 
36
  table_rows = tariff_table.find_all('tr')
37
  for row in table_rows:
38
  cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
 
39
  data.append(cols)
40
 
 
 
 
41
  # Save the data to a CSV file
42
- df = pd.DataFrame(data[1:], columns=data[0]) # Use the first row as headers
43
  df.to_csv(output_file, index=False)
44
 
45
  return output_file
 
7
  "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
8
  }
9
 
10
+ def scrape_tariff_data_to_csv(url, output_file="tariff_data.csv"):
11
  """
12
  Scrape tariff data from the given URL and save it to a CSV file.
13
 
 
31
  if not tariff_table:
32
  return "No table found on the webpage."
33
 
34
+ # Extract rows and normalize column counts
35
  data = []
36
+ max_columns = 0
37
  table_rows = tariff_table.find_all('tr')
38
  for row in table_rows:
39
  cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
40
+ max_columns = max(max_columns, len(cols))
41
  data.append(cols)
42
 
43
+ # Normalize rows to have the same number of columns
44
+ normalized_data = [row + [''] * (max_columns - len(row)) for row in data]
45
+
46
  # Save the data to a CSV file
47
+ df = pd.DataFrame(normalized_data[1:], columns=normalized_data[0]) # Use the first row as headers
48
  df.to_csv(output_file, index=False)
49
 
50
  return output_file