Spaces:

Engineer786
/

Hackathon

Build error

App Files Files Community

Hackathon / tariff_scraper.py

Engineer786

Update tariff_scraper.py

6ef2bb8 verified about 1 year ago

raw

history blame contribute delete

3.04 kB

	import requests
	from bs4 import BeautifulSoup

	# URLs for all electricity companies
	TARIFF_URLS = {
	"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
	"FESCO": "https://fesco.com.pk/tariff",
	"HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
	"KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
	"LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
	"PESCO": "https://pesconlinebill.pk/pesco-tariff/",
	"QESCO": "http://qesco.com.pk/Tariffs.aspx",
	"TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
	}

	def scrape_tariff_data(url):
	"""
	Scrape tariff data from the given URL.

	Args:
	url (str): The URL of the tariff page to scrape.

	Returns:
	list: A list of strings representing the rows of tariff data.
	"""
	try:
	# Send an HTTP GET request to the specified URL
	response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
	response.raise_for_status() # Raise an error for HTTP issues

	# Parse the webpage content using BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Try to find all <table> elements in the page
	tariff_sections = soup.find_all('table')
	if not tariff_sections:
	return ["No tables found on the webpage."]

	data = []
	for section in tariff_sections:
	table_rows = section.find_all('tr')
	for row in table_rows:
	# Extract text from each <td> or <th> within the row
	row_text = ' \| '.join(
	col.get_text(strip=True) for col in row.find_all(['th', 'td'])
	)
	if row_text: # Add only rows that have meaningful data
	data.append(row_text)

	return data if data else ["No data found in the tables."]
	except requests.exceptions.RequestException as e:
	# Handle request errors (e.g., connection issues, timeout)
	return [f"Request error: {e}"]
	except Exception as e:
	# Handle other potential errors
	return [f"An unexpected error occurred: {e}"]

	if __name__ == "__main__":
	# Let the user select a company and fetch the corresponding tariff data
	print("Available Companies:")
	for idx, company in enumerate(TARIFF_URLS.keys(), start=1):
	print(f"{idx}. {company}")

	try:
	# User selects a company
	selection = int(input("Enter the number corresponding to the company: "))
	selected_company = list(TARIFF_URLS.keys())[selection - 1]
	url = TARIFF_URLS[selected_company]
	print(f"\nFetching tariff data for {selected_company} ({url})...\n")

	# Scrape and display the data
	tariff_data = scrape_tariff_data(url)
	print("Tariff Data:")
	for row in tariff_data[:10]: # Show a preview of the first 10 rows
	print(row)
	except (ValueError, IndexError):
	print("Invalid selection. Please choose a valid company number.")