Hackathon / tariff_scraper.py
Engineer786's picture
Update tariff_scraper.py
683037f verified
raw
history blame
1.72 kB
import requests
from bs4 import BeautifulSoup
import pandas as pd
def fetch_tariff_data(url):
response = requests.get(url)
if response.status_code != 200:
raise Exception(f"Failed to fetch data from {url}, status code: {response.status_code}")
soup = BeautifulSoup(response.content, 'html.parser')
sections = soup.find_all('tr', id='table_heading')
tariff_data = {}
for section in sections:
heading = section.find('td').get_text(strip=True)
rows = section.find_next_siblings('tr')
data = []
for row in rows:
columns = row.find_all('td')
if len(columns) >= 5: # Ensure it has the expected number of columns
data.append({
"Sr. No.": columns[0].get_text(strip=True),
"Category": columns[1].get_text(strip=True),
"Fixed Rs/Cons/M": columns[2].get_text(strip=True),
"Fixed Rs/kW/M": columns[3].get_text(strip=True),
"Variable Rs/kWh": columns[4].get_text(strip=True),
})
tariff_data[heading] = pd.DataFrame(data)
return tariff_data
def save_tariff_data(tariff_data, file_path):
with pd.ExcelWriter(file_path) as writer:
for heading, df in tariff_data.items():
df.to_excel(writer, sheet_name=heading[:31], index=False)
if __name__ == "__main__":
url = "https://iesco.com.pk/index.php/customer-services/tariff-guide"
try:
tariff_data = fetch_tariff_data(url)
save_tariff_data(tariff_data, "tariff_data.xlsx")
print("Tariff data successfully saved to tariff_data.xlsx")
except Exception as e:
print(f"Error: {e}")