Hackathon / tariff_scraper.py
Engineer786's picture
Update tariff_scraper.py
85e160f verified
raw
history blame
2.72 kB
import streamlit as st
import requests
from bs4 import BeautifulSoup
# Dictionary of companies and their URLs
TARIFF_URLS = {
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
"FESCO": "https://fesco.com.pk/tariff",
"HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
"KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
"LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
"PESCO": "https://pesconlinebill.pk/pesco-tariff/",
"QESCO": "http://qesco.com.pk/Tariffs.aspx",
"TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
}
# Function to scrape tariff data from a given URL
def scrape_tariff_data(url):
try:
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
response.raise_for_status() # Raise an error for bad responses
soup = BeautifulSoup(response.text, 'html.parser')
# Extract specific elements based on the webpage structure
# Assume tariff data is in <table> tags
tariff_sections = soup.find_all('table')
data = []
for section in tariff_sections:
table_rows = section.find_all('tr')
for row in table_rows:
row_text = ' | '.join(
col.get_text(strip=True) for col in row.find_all(['th', 'td'])
)
if row_text: # Add the row text only if it contains data
data.append(row_text)
return data # Returns a list of row strings
except Exception as e:
return f"An error occurred: {e}"
# Streamlit app main function
def main():
st.title("Electricity Tariff Scraper")
st.write("Select the company to fetch tariff rates:")
# Dropdown menu for company selection
company = st.selectbox("Select Company", list(TARIFF_URLS.keys()))
if st.button("Scrape"):
if company:
url = TARIFF_URLS[company] # Get the URL for the selected company
st.write(f"Scraping data for: **{company}**")
with st.spinner("Scraping data..."):
data = scrape_tariff_data(url)
if isinstance(data, list) and data:
st.success("Data scraped successfully!")
st.write("Here is a preview of the data:")
for row in data[:10]: # Show only the first 10 rows for readability
st.write(row)
elif isinstance(data, list) and not data:
st.warning("No data found on the page.")
else:
st.error(data)
else:
st.error("Please select a valid company.")
if __name__ == "__main__":
main()