Hackathon / tariff_scraper.py
Engineer786's picture
Update tariff_scraper.py
6ef2bb8 verified
import requests
from bs4 import BeautifulSoup
# URLs for all electricity companies
TARIFF_URLS = {
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
"FESCO": "https://fesco.com.pk/tariff",
"HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
"KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
"LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
"PESCO": "https://pesconlinebill.pk/pesco-tariff/",
"QESCO": "http://qesco.com.pk/Tariffs.aspx",
"TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
}
def scrape_tariff_data(url):
"""
Scrape tariff data from the given URL.
Args:
url (str): The URL of the tariff page to scrape.
Returns:
list: A list of strings representing the rows of tariff data.
"""
try:
# Send an HTTP GET request to the specified URL
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
response.raise_for_status() # Raise an error for HTTP issues
# Parse the webpage content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Try to find all <table> elements in the page
tariff_sections = soup.find_all('table')
if not tariff_sections:
return ["No tables found on the webpage."]
data = []
for section in tariff_sections:
table_rows = section.find_all('tr')
for row in table_rows:
# Extract text from each <td> or <th> within the row
row_text = ' | '.join(
col.get_text(strip=True) for col in row.find_all(['th', 'td'])
)
if row_text: # Add only rows that have meaningful data
data.append(row_text)
return data if data else ["No data found in the tables."]
except requests.exceptions.RequestException as e:
# Handle request errors (e.g., connection issues, timeout)
return [f"Request error: {e}"]
except Exception as e:
# Handle other potential errors
return [f"An unexpected error occurred: {e}"]
if __name__ == "__main__":
# Let the user select a company and fetch the corresponding tariff data
print("Available Companies:")
for idx, company in enumerate(TARIFF_URLS.keys(), start=1):
print(f"{idx}. {company}")
try:
# User selects a company
selection = int(input("Enter the number corresponding to the company: "))
selected_company = list(TARIFF_URLS.keys())[selection - 1]
url = TARIFF_URLS[selected_company]
print(f"\nFetching tariff data for {selected_company} ({url})...\n")
# Scrape and display the data
tariff_data = scrape_tariff_data(url)
print("Tariff Data:")
for row in tariff_data[:10]: # Show a preview of the first 10 rows
print(row)
except (ValueError, IndexError):
print("Invalid selection. Please choose a valid company number.")