Engineer786 commited on
Commit
85e160f
·
verified ·
1 Parent(s): 291ff42

Update tariff_scraper.py

Browse files
Files changed (1) hide show
  1. tariff_scraper.py +56 -39
tariff_scraper.py CHANGED
@@ -1,52 +1,69 @@
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
- import pandas as pd
4
 
5
- def fetch_tariff_data(url):
6
- response = requests.get(url)
7
- if response.status_code != 200:
8
- raise Exception(f"Failed to fetch data from {url}, status code: {response.status_code}")
 
 
 
 
 
 
 
9
 
10
- soup = BeautifulSoup(response.content, 'html.parser')
11
- sections = soup.find_all('tr', id='table_heading')
 
 
 
 
12
 
13
- tariff_data = {}
14
- for section in sections:
15
- heading = section.find('td').get_text(strip=True)
16
- rows = section.find_next_siblings('tr')
17
 
18
  data = []
19
- for row in rows:
20
- columns = row.find_all('td')
21
- if len(columns) >= 5: # Ensure it has the expected number of columns
22
- data.append({
23
- "Sr. No.": columns[0].get_text(strip=True),
24
- "Category": columns[1].get_text(strip=True),
25
- "Fixed Rs/Cons/M": columns[2].get_text(strip=True),
26
- "Fixed Rs/kW/M": columns[3].get_text(strip=True),
27
- "Variable Rs/kWh": columns[4].get_text(strip=True),
28
- })
29
-
30
- tariff_data[heading] = pd.DataFrame(data)
31
-
32
- return tariff_data
33
-
34
- def save_tariff_data(tariff_data, file_path):
35
- with pd.ExcelWriter(file_path) as writer:
36
- for heading, df in tariff_data.items():
37
- df.to_excel(writer, sheet_name=heading[:31], index=False)
38
 
39
- if __name__ == "__main__":
40
- url = "https://iesco.com.pk/index.php/customer-services/tariff-guide"
41
- try:
42
- tariff_data = fetch_tariff_data(url)
43
- save_tariff_data(tariff_data, "tariff_data.xlsx")
44
- print("Tariff data successfully saved to tariff_data.xlsx")
45
  except Exception as e:
46
- print(f"Error: {e}")
47
-
48
-
49
 
 
 
 
 
50
 
 
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
 
 
 
1
+ import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
 
4
 
5
+ # Dictionary of companies and their URLs
6
+ TARIFF_URLS = {
7
+ "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
8
+ "FESCO": "https://fesco.com.pk/tariff",
9
+ "HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
10
+ "KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
11
+ "LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
12
+ "PESCO": "https://pesconlinebill.pk/pesco-tariff/",
13
+ "QESCO": "http://qesco.com.pk/Tariffs.aspx",
14
+ "TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
15
+ }
16
 
17
+ # Function to scrape tariff data from a given URL
18
+ def scrape_tariff_data(url):
19
+ try:
20
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
21
+ response.raise_for_status() # Raise an error for bad responses
22
+ soup = BeautifulSoup(response.text, 'html.parser')
23
 
24
+ # Extract specific elements based on the webpage structure
25
+ # Assume tariff data is in <table> tags
26
+ tariff_sections = soup.find_all('table')
 
27
 
28
  data = []
29
+ for section in tariff_sections:
30
+ table_rows = section.find_all('tr')
31
+ for row in table_rows:
32
+ row_text = ' | '.join(
33
+ col.get_text(strip=True) for col in row.find_all(['th', 'td'])
34
+ )
35
+ if row_text: # Add the row text only if it contains data
36
+ data.append(row_text)
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ return data # Returns a list of row strings
 
 
 
 
 
39
  except Exception as e:
40
+ return f"An error occurred: {e}"
 
 
41
 
42
+ # Streamlit app main function
43
+ def main():
44
+ st.title("Electricity Tariff Scraper")
45
+ st.write("Select the company to fetch tariff rates:")
46
 
47
+ # Dropdown menu for company selection
48
+ company = st.selectbox("Select Company", list(TARIFF_URLS.keys()))
49
 
50
+ if st.button("Scrape"):
51
+ if company:
52
+ url = TARIFF_URLS[company] # Get the URL for the selected company
53
+ st.write(f"Scraping data for: **{company}**")
54
+ with st.spinner("Scraping data..."):
55
+ data = scrape_tariff_data(url)
56
+ if isinstance(data, list) and data:
57
+ st.success("Data scraped successfully!")
58
+ st.write("Here is a preview of the data:")
59
+ for row in data[:10]: # Show only the first 10 rows for readability
60
+ st.write(row)
61
+ elif isinstance(data, list) and not data:
62
+ st.warning("No data found on the page.")
63
+ else:
64
+ st.error(data)
65
+ else:
66
+ st.error("Please select a valid company.")
67
 
68
+ if __name__ == "__main__":
69
+ main()