Demoforhackathon / tariff_scraper.py
Engineer786's picture
Update tariff_scraper.py
096c143 verified
raw
history blame
2.38 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Define the URL for PESCO tariff rates
TARIFF_URLS = {
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
}
def scrape_tariff_data_to_csv(url, output_file="tariff_data.csv"):
"""
Scrape tariff data from the given URL and save it to a CSV file.
Args:
url (str): The URL of the tariff page to scrape.
output_file (str): The name of the CSV file to save data.
Returns:
str: The name of the CSV file if successful, or an error message.
"""
try:
# Send an HTTP GET request to the specified URL
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
response.raise_for_status() # Raise an error for HTTP issues
# Parse the webpage content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Extract table rows
tariff_table = soup.find('table')
if not tariff_table:
return "No table found on the webpage."
# Extract rows and normalize column counts
data = []
max_columns = 0
table_rows = tariff_table.find_all('tr')
for row in table_rows:
cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
max_columns = max(max_columns, len(cols))
data.append(cols)
# Normalize rows to have the same number of columns
normalized_data = [row + [''] * (max_columns - len(row)) for row in data]
# Save the data to a CSV file
df = pd.DataFrame(normalized_data[1:], columns=normalized_data[0]) # Use the first row as headers
df.to_csv(output_file, index=False)
return output_file
except requests.exceptions.RequestException as e:
# Handle request errors (e.g., connection issues, timeout)
return f"Request error: {e}"
except Exception as e:
# Handle other potential errors
return f"An unexpected error occurred: {e}"
if __name__ == "__main__":
# Test the scraper and save data to a CSV file
url = TARIFF_URLS["IESCO"]
output_file = "iesco_tariff_data.csv"
result = scrape_tariff_data_to_csv(url, output_file)
if result.endswith(".csv"):
print(f"Data successfully saved to {output_file}")
else:
print(f"Error: {result}")