Spaces:
Build error
Build error
| import requests | |
| from bs4 import BeautifulSoup | |
| import pandas as pd | |
| # Define the URL for PESCO tariff rates | |
| TARIFF_URLS = { | |
| "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide" | |
| } | |
| def scrape_tariff_data_to_csv(url, output_file="tariff_data.csv"): | |
| """ | |
| Scrape tariff data from the given URL and save it to a CSV file. | |
| Args: | |
| url (str): The URL of the tariff page to scrape. | |
| output_file (str): The name of the CSV file to save data. | |
| Returns: | |
| str: The name of the CSV file if successful, or an error message. | |
| """ | |
| try: | |
| # Send an HTTP GET request to the specified URL | |
| response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}) | |
| response.raise_for_status() # Raise an error for HTTP issues | |
| # Parse the webpage content using BeautifulSoup | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract table rows | |
| tariff_table = soup.find('table') | |
| if not tariff_table: | |
| return "No table found on the webpage." | |
| # Extract rows and normalize column counts | |
| data = [] | |
| max_columns = 0 | |
| table_rows = tariff_table.find_all('tr') | |
| for row in table_rows: | |
| cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])] | |
| max_columns = max(max_columns, len(cols)) | |
| data.append(cols) | |
| # Normalize rows to have the same number of columns | |
| normalized_data = [row + [''] * (max_columns - len(row)) for row in data] | |
| # Save the data to a CSV file | |
| df = pd.DataFrame(normalized_data[1:], columns=normalized_data[0]) # Use the first row as headers | |
| df.to_csv(output_file, index=False) | |
| return output_file | |
| except requests.exceptions.RequestException as e: | |
| # Handle request errors (e.g., connection issues, timeout) | |
| return f"Request error: {e}" | |
| except Exception as e: | |
| # Handle other potential errors | |
| return f"An unexpected error occurred: {e}" | |
| if __name__ == "__main__": | |
| # Test the scraper and save data to a CSV file | |
| url = TARIFF_URLS["IESCO"] | |
| output_file = "iesco_tariff_data.csv" | |
| result = scrape_tariff_data_to_csv(url, output_file) | |
| if result.endswith(".csv"): | |
| print(f"Data successfully saved to {output_file}") | |
| else: | |
| print(f"Error: {result}") | |