Spaces:

Engineer786
/

Demoforhackathon

Build error

Engineer786 commited on Jan 5, 2025

Commit

096c143

verified ·

1 Parent(s): bdc1d95

Update tariff_scraper.py

Files changed (1) hide show

tariff_scraper.py CHANGED Viewed

@@ -7,7 +7,7 @@ TARIFF_URLS = {
     "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
 }
-def scrape_tariff_data_to_csv(url, output_file="pesco_tariff_data.csv"):
     """
     Scrape tariff data from the given URL and save it to a CSV file.
@@ -31,15 +31,20 @@ def scrape_tariff_data_to_csv(url, output_file="pesco_tariff_data.csv"):
         if not tariff_table:
             return "No table found on the webpage."
-        # Extract data and convert it into a structured format
         data = []
         table_rows = tariff_table.find_all('tr')
         for row in table_rows:
             cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
             data.append(cols)
         # Save the data to a CSV file
-        df = pd.DataFrame(data[1:], columns=data[0])  # Use the first row as headers
         df.to_csv(output_file, index=False)
         return output_file

     "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
 }
+def scrape_tariff_data_to_csv(url, output_file="tariff_data.csv"):
     """
     Scrape tariff data from the given URL and save it to a CSV file.
         if not tariff_table:
             return "No table found on the webpage."
+        # Extract rows and normalize column counts
         data = []
+        max_columns = 0
         table_rows = tariff_table.find_all('tr')
         for row in table_rows:
             cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
+            max_columns = max(max_columns, len(cols))
             data.append(cols)
+        # Normalize rows to have the same number of columns
+        normalized_data = [row + [''] * (max_columns - len(row)) for row in data]
         # Save the data to a CSV file
+        df = pd.DataFrame(normalized_data[1:], columns=normalized_data[0])  # Use the first row as headers
         df.to_csv(output_file, index=False)
         return output_file