Spaces:
Build error
Build error
Update tariff_scraper.py
Browse files- tariff_scraper.py +8 -3
tariff_scraper.py
CHANGED
|
@@ -7,7 +7,7 @@ TARIFF_URLS = {
|
|
| 7 |
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
|
| 8 |
}
|
| 9 |
|
| 10 |
-
def scrape_tariff_data_to_csv(url, output_file="
|
| 11 |
"""
|
| 12 |
Scrape tariff data from the given URL and save it to a CSV file.
|
| 13 |
|
|
@@ -31,15 +31,20 @@ def scrape_tariff_data_to_csv(url, output_file="pesco_tariff_data.csv"):
|
|
| 31 |
if not tariff_table:
|
| 32 |
return "No table found on the webpage."
|
| 33 |
|
| 34 |
-
# Extract
|
| 35 |
data = []
|
|
|
|
| 36 |
table_rows = tariff_table.find_all('tr')
|
| 37 |
for row in table_rows:
|
| 38 |
cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
|
|
|
|
| 39 |
data.append(cols)
|
| 40 |
|
|
|
|
|
|
|
|
|
|
| 41 |
# Save the data to a CSV file
|
| 42 |
-
df = pd.DataFrame(
|
| 43 |
df.to_csv(output_file, index=False)
|
| 44 |
|
| 45 |
return output_file
|
|
|
|
| 7 |
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
|
| 8 |
}
|
| 9 |
|
| 10 |
+
def scrape_tariff_data_to_csv(url, output_file="tariff_data.csv"):
|
| 11 |
"""
|
| 12 |
Scrape tariff data from the given URL and save it to a CSV file.
|
| 13 |
|
|
|
|
| 31 |
if not tariff_table:
|
| 32 |
return "No table found on the webpage."
|
| 33 |
|
| 34 |
+
# Extract rows and normalize column counts
|
| 35 |
data = []
|
| 36 |
+
max_columns = 0
|
| 37 |
table_rows = tariff_table.find_all('tr')
|
| 38 |
for row in table_rows:
|
| 39 |
cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
|
| 40 |
+
max_columns = max(max_columns, len(cols))
|
| 41 |
data.append(cols)
|
| 42 |
|
| 43 |
+
# Normalize rows to have the same number of columns
|
| 44 |
+
normalized_data = [row + [''] * (max_columns - len(row)) for row in data]
|
| 45 |
+
|
| 46 |
# Save the data to a CSV file
|
| 47 |
+
df = pd.DataFrame(normalized_data[1:], columns=normalized_data[0]) # Use the first row as headers
|
| 48 |
df.to_csv(output_file, index=False)
|
| 49 |
|
| 50 |
return output_file
|