Engineer786 commited on
Commit
2b26716
·
verified ·
1 Parent(s): 9c6647f

Update tariff_scraper.py

Browse files
Files changed (1) hide show
  1. tariff_scraper.py +19 -38
tariff_scraper.py CHANGED
@@ -4,12 +4,12 @@ import pandas as pd
4
 
5
  # Define the URL for PESCO tariff rates
6
  TARIFF_URLS = {
7
- "PESCO": "https://onlinepescobill.pk/pesco-tariff-rates/"
8
  }
9
 
10
- def scrape_multiple_sections_to_csv(url, output_file="pesco_tariff_data.csv"):
11
  """
12
- Scrape tariff data from multiple sections on the given URL and save it to a CSV file.
13
 
14
  Args:
15
  url (str): The URL of the tariff page to scrape.
@@ -26,40 +26,21 @@ def scrape_multiple_sections_to_csv(url, output_file="pesco_tariff_data.csv"):
26
  # Parse the webpage content using BeautifulSoup
27
  soup = BeautifulSoup(response.text, 'html.parser')
28
 
29
- # Find all tables on the page
30
- tables = soup.find_all('table')
31
- if not tables:
32
- return "No tables found on the webpage."
33
 
34
- # Initialize a list to hold all dataframes
35
- all_data = []
 
 
 
 
36
 
37
- for i, table in enumerate(tables, start=1):
38
- # Extract table rows
39
- data = []
40
- table_rows = table.find_all('tr')
41
- for row in table_rows:
42
- cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
43
- data.append(cols)
44
-
45
- # Create a dataframe for the current table
46
- if len(data) > 1:
47
- df = pd.DataFrame(data[1:], columns=data[0])
48
- else:
49
- df = pd.DataFrame(data)
50
-
51
- # Deduplicate column names if necessary
52
- df.columns = pd.io.parsers._deduplicate(df.columns)
53
-
54
- # Add a section identifier
55
- df["Section"] = f"Section {i}"
56
- all_data.append(df)
57
-
58
- # Combine all dataframes into one
59
- combined_data = pd.concat(all_data, ignore_index=True)
60
-
61
- # Save the combined data to a CSV file
62
- combined_data.to_csv(output_file, index=False)
63
 
64
  return output_file
65
  except requests.exceptions.RequestException as e:
@@ -71,9 +52,9 @@ def scrape_multiple_sections_to_csv(url, output_file="pesco_tariff_data.csv"):
71
 
72
  if __name__ == "__main__":
73
  # Test the scraper and save data to a CSV file
74
- url = TARIFF_URLS["PESCO"]
75
- output_file = "pesco_tariff_data.csv"
76
- result = scrape_multiple_sections_to_csv(url, output_file)
77
  if result.endswith(".csv"):
78
  print(f"Data successfully saved to {output_file}")
79
  else:
 
4
 
5
  # Define the URL for PESCO tariff rates
6
  TARIFF_URLS = {
7
+ "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
8
  }
9
 
10
+ def scrape_tariff_data_to_csv(url, output_file="pesco_tariff_data.csv"):
11
  """
12
+ Scrape tariff data from the given URL and save it to a CSV file.
13
 
14
  Args:
15
  url (str): The URL of the tariff page to scrape.
 
26
  # Parse the webpage content using BeautifulSoup
27
  soup = BeautifulSoup(response.text, 'html.parser')
28
 
29
+ # Extract table rows
30
+ tariff_table = soup.find('table')
31
+ if not tariff_table:
32
+ return "No table found on the webpage."
33
 
34
+ # Extract data and convert it into a structured format
35
+ data = []
36
+ table_rows = tariff_table.find_all('tr')
37
+ for row in table_rows:
38
+ cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
39
+ data.append(cols)
40
 
41
+ # Save the data to a CSV file
42
+ df = pd.DataFrame(data[1:], columns=data[0]) # Use the first row as headers
43
+ df.to_csv(output_file, index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  return output_file
46
  except requests.exceptions.RequestException as e:
 
52
 
53
  if __name__ == "__main__":
54
  # Test the scraper and save data to a CSV file
55
+ url = TARIFF_URLS["IESCO"]
56
+ output_file = "iesco_tariff_data.csv"
57
+ result = scrape_tariff_data_to_csv(url, output_file)
58
  if result.endswith(".csv"):
59
  print(f"Data successfully saved to {output_file}")
60
  else: