Engineer786 commited on
Commit
a6f7192
·
verified ·
1 Parent(s): 85e160f

Update tariff_scraper.py

Browse files
Files changed (1) hide show
  1. tariff_scraper.py +7 -36
tariff_scraper.py CHANGED
@@ -1,8 +1,7 @@
1
- import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
 
5
- # Dictionary of companies and their URLs
6
  TARIFF_URLS = {
7
  "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
8
  "FESCO": "https://fesco.com.pk/tariff",
@@ -14,16 +13,17 @@ TARIFF_URLS = {
14
  "TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
15
  }
16
 
17
- # Function to scrape tariff data from a given URL
18
  def scrape_tariff_data(url):
 
 
 
19
  try:
20
  response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
21
  response.raise_for_status() # Raise an error for bad responses
22
  soup = BeautifulSoup(response.text, 'html.parser')
23
 
24
  # Extract specific elements based on the webpage structure
25
- # Assume tariff data is in <table> tags
26
- tariff_sections = soup.find_all('table')
27
 
28
  data = []
29
  for section in tariff_sections:
@@ -35,35 +35,6 @@ def scrape_tariff_data(url):
35
  if row_text: # Add the row text only if it contains data
36
  data.append(row_text)
37
 
38
- return data # Returns a list of row strings
39
  except Exception as e:
40
- return f"An error occurred: {e}"
41
-
42
- # Streamlit app main function
43
- def main():
44
- st.title("Electricity Tariff Scraper")
45
- st.write("Select the company to fetch tariff rates:")
46
-
47
- # Dropdown menu for company selection
48
- company = st.selectbox("Select Company", list(TARIFF_URLS.keys()))
49
-
50
- if st.button("Scrape"):
51
- if company:
52
- url = TARIFF_URLS[company] # Get the URL for the selected company
53
- st.write(f"Scraping data for: **{company}**")
54
- with st.spinner("Scraping data..."):
55
- data = scrape_tariff_data(url)
56
- if isinstance(data, list) and data:
57
- st.success("Data scraped successfully!")
58
- st.write("Here is a preview of the data:")
59
- for row in data[:10]: # Show only the first 10 rows for readability
60
- st.write(row)
61
- elif isinstance(data, list) and not data:
62
- st.warning("No data found on the page.")
63
- else:
64
- st.error(data)
65
- else:
66
- st.error("Please select a valid company.")
67
-
68
- if __name__ == "__main__":
69
- main()
 
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
 
4
+ # Dictionary mapping companies to their URLs
5
  TARIFF_URLS = {
6
  "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
7
  "FESCO": "https://fesco.com.pk/tariff",
 
13
  "TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
14
  }
15
 
 
16
  def scrape_tariff_data(url):
17
+ """
18
+ Scrape tariff data from the given URL.
19
+ """
20
  try:
21
  response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
22
  response.raise_for_status() # Raise an error for bad responses
23
  soup = BeautifulSoup(response.text, 'html.parser')
24
 
25
  # Extract specific elements based on the webpage structure
26
+ tariff_sections = soup.find_all('table') # Assume tariff data is in <table> tags
 
27
 
28
  data = []
29
  for section in tariff_sections:
 
35
  if row_text: # Add the row text only if it contains data
36
  data.append(row_text)
37
 
38
+ return data if data else ["No data found on the webpage."]
39
  except Exception as e:
40
+ return [f"An error occurred: {e}"]