Spaces:

Engineer786
/

Hackathon

Build error

App Files Files Community

Hackathon / tariff_scraper.py

Engineer786

Update tariff_scraper.py

85e160f verified over 1 year ago

raw

history blame

2.72 kB

	import streamlit as st
	import requests
	from bs4 import BeautifulSoup

	# Dictionary of companies and their URLs
	TARIFF_URLS = {
	"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
	"FESCO": "https://fesco.com.pk/tariff",
	"HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
	"KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
	"LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
	"PESCO": "https://pesconlinebill.pk/pesco-tariff/",
	"QESCO": "http://qesco.com.pk/Tariffs.aspx",
	"TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
	}

	# Function to scrape tariff data from a given URL
	def scrape_tariff_data(url):
	try:
	response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
	response.raise_for_status() # Raise an error for bad responses
	soup = BeautifulSoup(response.text, 'html.parser')

	# Extract specific elements based on the webpage structure
	# Assume tariff data is in <table> tags
	tariff_sections = soup.find_all('table')

	data = []
	for section in tariff_sections:
	table_rows = section.find_all('tr')
	for row in table_rows:
	row_text = ' \| '.join(
	col.get_text(strip=True) for col in row.find_all(['th', 'td'])
	)
	if row_text: # Add the row text only if it contains data
	data.append(row_text)

	return data # Returns a list of row strings
	except Exception as e:
	return f"An error occurred: {e}"

	# Streamlit app main function
	def main():
	st.title("Electricity Tariff Scraper")
	st.write("Select the company to fetch tariff rates:")

	# Dropdown menu for company selection
	company = st.selectbox("Select Company", list(TARIFF_URLS.keys()))

	if st.button("Scrape"):
	if company:
	url = TARIFF_URLS[company] # Get the URL for the selected company
	st.write(f"Scraping data for: {company}")
	with st.spinner("Scraping data..."):
	data = scrape_tariff_data(url)
	if isinstance(data, list) and data:
	st.success("Data scraped successfully!")
	st.write("Here is a preview of the data:")
	for row in data[:10]: # Show only the first 10 rows for readability
	st.write(row)
	elif isinstance(data, list) and not data:
	st.warning("No data found on the page.")
	else:
	st.error(data)
	else:
	st.error("Please select a valid company.")

	if __name__ == "__main__":
	main()