Engineer786 commited on
Commit
807f1f4
·
verified ·
1 Parent(s): 0af0974

Delete tariff_scraper.py

Browse files
Files changed (1) hide show
  1. tariff_scraper.py +0 -66
tariff_scraper.py DELETED
@@ -1,66 +0,0 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
- import pandas as pd
4
-
5
- # Define the URL for PESCO tariff rates
6
- TARIFF_URLS = {
7
- "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
8
- }
9
-
10
- def scrape_tariff_data_to_csv(url, output_file="tariff_data.csv"):
11
- """
12
- Scrape tariff data from the given URL and save it to a CSV file.
13
-
14
- Args:
15
- url (str): The URL of the tariff page to scrape.
16
- output_file (str): The name of the CSV file to save data.
17
-
18
- Returns:
19
- str: The name of the CSV file if successful, or an error message.
20
- """
21
- try:
22
- # Send an HTTP GET request to the specified URL
23
- response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
24
- response.raise_for_status() # Raise an error for HTTP issues
25
-
26
- # Parse the webpage content using BeautifulSoup
27
- soup = BeautifulSoup(response.text, 'html.parser')
28
-
29
- # Extract table rows
30
- tariff_table = soup.find('table')
31
- if not tariff_table:
32
- return "No table found on the webpage."
33
-
34
- # Extract rows and normalize column counts
35
- data = []
36
- max_columns = 0
37
- table_rows = tariff_table.find_all('tr')
38
- for row in table_rows:
39
- cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
40
- max_columns = max(max_columns, len(cols))
41
- data.append(cols)
42
-
43
- # Normalize rows to have the same number of columns
44
- normalized_data = [row + [''] * (max_columns - len(row)) for row in data]
45
-
46
- # Save the data to a CSV file
47
- df = pd.DataFrame(normalized_data[1:], columns=normalized_data[0]) # Use the first row as headers
48
- df.to_csv(output_file, index=False)
49
-
50
- return output_file
51
- except requests.exceptions.RequestException as e:
52
- # Handle request errors (e.g., connection issues, timeout)
53
- return f"Request error: {e}"
54
- except Exception as e:
55
- # Handle other potential errors
56
- return f"An unexpected error occurred: {e}"
57
-
58
- if __name__ == "__main__":
59
- # Test the scraper and save data to a CSV file
60
- url = TARIFF_URLS["IESCO"]
61
- output_file = "iesco_tariff_data.csv"
62
- result = scrape_tariff_data_to_csv(url, output_file)
63
- if result.endswith(".csv"):
64
- print(f"Data successfully saved to {output_file}")
65
- else:
66
- print(f"Error: {result}")