Spaces:

Engineer786
/

Demoforhackathon

Build error

App Files Files Community

Demoforhackathon / tariff_scraper.py

Engineer786

Update tariff_scraper.py

096c143 verified over 1 year ago

raw

history blame

2.38 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd

	# Define the URL for PESCO tariff rates
	TARIFF_URLS = {
	"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
	}

	def scrape_tariff_data_to_csv(url, output_file="tariff_data.csv"):
	"""
	Scrape tariff data from the given URL and save it to a CSV file.

	Args:
	url (str): The URL of the tariff page to scrape.
	output_file (str): The name of the CSV file to save data.

	Returns:
	str: The name of the CSV file if successful, or an error message.
	"""
	try:
	# Send an HTTP GET request to the specified URL
	response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
	response.raise_for_status() # Raise an error for HTTP issues

	# Parse the webpage content using BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Extract table rows
	tariff_table = soup.find('table')
	if not tariff_table:
	return "No table found on the webpage."

	# Extract rows and normalize column counts
	data = []
	max_columns = 0
	table_rows = tariff_table.find_all('tr')
	for row in table_rows:
	cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
	max_columns = max(max_columns, len(cols))
	data.append(cols)

	# Normalize rows to have the same number of columns
	normalized_data = [row + [''] * (max_columns - len(row)) for row in data]

	# Save the data to a CSV file
	df = pd.DataFrame(normalized_data[1:], columns=normalized_data[0]) # Use the first row as headers
	df.to_csv(output_file, index=False)

	return output_file
	except requests.exceptions.RequestException as e:
	# Handle request errors (e.g., connection issues, timeout)
	return f"Request error: {e}"
	except Exception as e:
	# Handle other potential errors
	return f"An unexpected error occurred: {e}"

	if __name__ == "__main__":
	# Test the scraper and save data to a CSV file
	url = TARIFF_URLS["IESCO"]
	output_file = "iesco_tariff_data.csv"
	result = scrape_tariff_data_to_csv(url, output_file)
	if result.endswith(".csv"):
	print(f"Data successfully saved to {output_file}")
	else:
	print(f"Error: {result}")