Spaces:

Engineer786
/

Hackathon

Build error

App Files Files Community

Engineer786 commited on Jan 5, 2025

Commit

85e160f

verified ·

1 Parent(s): 291ff42

Update tariff_scraper.py

Browse files

Files changed (1) hide show

tariff_scraper.py +56 -39

tariff_scraper.py CHANGED Viewed

@@ -1,52 +1,69 @@
 import requests
 from bs4 import BeautifulSoup
-import pandas as pd
-def fetch_tariff_data(url):
-    response = requests.get(url)
-    if response.status_code != 200:
-        raise Exception(f"Failed to fetch data from {url}, status code: {response.status_code}")
-    soup = BeautifulSoup(response.content, 'html.parser')
-    sections = soup.find_all('tr', id='table_heading')
-    tariff_data = {}
-    for section in sections:
-        heading = section.find('td').get_text(strip=True)
-        rows = section.find_next_siblings('tr')
         data = []
-        for row in rows:
-            columns = row.find_all('td')
-            if len(columns) >= 5:  # Ensure it has the expected number of columns
-                data.append({
-                    "Sr. No.": columns[0].get_text(strip=True),
-                    "Category": columns[1].get_text(strip=True),
-                    "Fixed Rs/Cons/M": columns[2].get_text(strip=True),
-                    "Fixed Rs/kW/M": columns[3].get_text(strip=True),
-                    "Variable Rs/kWh": columns[4].get_text(strip=True),
-                })
-        tariff_data[heading] = pd.DataFrame(data)
-    return tariff_data
-def save_tariff_data(tariff_data, file_path):
-    with pd.ExcelWriter(file_path) as writer:
-        for heading, df in tariff_data.items():
-            df.to_excel(writer, sheet_name=heading[:31], index=False)
-if __name__ == "__main__":
-    url = "https://iesco.com.pk/index.php/customer-services/tariff-guide"
-    try:
-        tariff_data = fetch_tariff_data(url)
-        save_tariff_data(tariff_data, "tariff_data.xlsx")
-        print("Tariff data successfully saved to tariff_data.xlsx")
     except Exception as e:
-        print(f"Error: {e}")

+import streamlit as st
 import requests
 from bs4 import BeautifulSoup
+# Dictionary of companies and their URLs
+TARIFF_URLS = {
+    "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
+    "FESCO": "https://fesco.com.pk/tariff",
+    "HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
+    "KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
+    "LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
+    "PESCO": "https://pesconlinebill.pk/pesco-tariff/",
+    "QESCO": "http://qesco.com.pk/Tariffs.aspx",
+    "TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
+}
+# Function to scrape tariff data from a given URL
+def scrape_tariff_data(url):
+    try:
+        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
+        response.raise_for_status()  # Raise an error for bad responses
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Extract specific elements based on the webpage structure
+        # Assume tariff data is in <table> tags
+        tariff_sections = soup.find_all('table')
         data = []
+        for section in tariff_sections:
+            table_rows = section.find_all('tr')
+            for row in table_rows:
+                row_text = ' | '.join(
+                    col.get_text(strip=True) for col in row.find_all(['th', 'td'])
+                )
+                if row_text:  # Add the row text only if it contains data
+                    data.append(row_text)
+        return data  # Returns a list of row strings
     except Exception as e:
+        return f"An error occurred: {e}"
+# Streamlit app main function
+def main():
+    st.title("Electricity Tariff Scraper")
+    st.write("Select the company to fetch tariff rates:")
+    # Dropdown menu for company selection
+    company = st.selectbox("Select Company", list(TARIFF_URLS.keys()))
+    if st.button("Scrape"):
+        if company:
+            url = TARIFF_URLS[company]  # Get the URL for the selected company
+            st.write(f"Scraping data for: **{company}**")
+            with st.spinner("Scraping data..."):
+                data = scrape_tariff_data(url)
+                if isinstance(data, list) and data:
+                    st.success("Data scraped successfully!")
+                    st.write("Here is a preview of the data:")
+                    for row in data[:10]:  # Show only the first 10 rows for readability
+                        st.write(row)
+                elif isinstance(data, list) and not data:
+                    st.warning("No data found on the page.")
+                else:
+                    st.error(data)
+        else:
+            st.error("Please select a valid company.")
+if __name__ == "__main__":
+    main()