Spaces:

Engineer786
/

Demoforhackathon

Build error

App Files Files Community

Engineer786 commited on Jan 5, 2025

Commit

2b26716

verified ·

1 Parent(s): 9c6647f

Update tariff_scraper.py

Browse files

Files changed (1) hide show

tariff_scraper.py +19 -38

tariff_scraper.py CHANGED Viewed

@@ -4,12 +4,12 @@ import pandas as pd
 # Define the URL for PESCO tariff rates
 TARIFF_URLS = {
-    "PESCO": "https://onlinepescobill.pk/pesco-tariff-rates/"
 }
-def scrape_multiple_sections_to_csv(url, output_file="pesco_tariff_data.csv"):
     """
-    Scrape tariff data from multiple sections on the given URL and save it to a CSV file.
     Args:
         url (str): The URL of the tariff page to scrape.
@@ -26,40 +26,21 @@ def scrape_multiple_sections_to_csv(url, output_file="pesco_tariff_data.csv"):
         # Parse the webpage content using BeautifulSoup
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Find all tables on the page
-        tables = soup.find_all('table')
-        if not tables:
-            return "No tables found on the webpage."
-        # Initialize a list to hold all dataframes
-        all_data = []
-        for i, table in enumerate(tables, start=1):
-            # Extract table rows
-            data = []
-            table_rows = table.find_all('tr')
-            for row in table_rows:
-                cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
-                data.append(cols)
-            # Create a dataframe for the current table
-            if len(data) > 1:
-                df = pd.DataFrame(data[1:], columns=data[0])
-            else:
-                df = pd.DataFrame(data)
-            # Deduplicate column names if necessary
-            df.columns = pd.io.parsers._deduplicate(df.columns)
-            # Add a section identifier
-            df["Section"] = f"Section {i}"
-            all_data.append(df)
-        # Combine all dataframes into one
-        combined_data = pd.concat(all_data, ignore_index=True)
-        # Save the combined data to a CSV file
-        combined_data.to_csv(output_file, index=False)
         return output_file
     except requests.exceptions.RequestException as e:
@@ -71,9 +52,9 @@ def scrape_multiple_sections_to_csv(url, output_file="pesco_tariff_data.csv"):
 if __name__ == "__main__":
     # Test the scraper and save data to a CSV file
-    url = TARIFF_URLS["PESCO"]
-    output_file = "pesco_tariff_data.csv"
-    result = scrape_multiple_sections_to_csv(url, output_file)
     if result.endswith(".csv"):
         print(f"Data successfully saved to {output_file}")
     else:

 # Define the URL for PESCO tariff rates
 TARIFF_URLS = {
+    "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide"
 }
+def scrape_tariff_data_to_csv(url, output_file="pesco_tariff_data.csv"):
     """
+    Scrape tariff data from the given URL and save it to a CSV file.
     Args:
         url (str): The URL of the tariff page to scrape.
         # Parse the webpage content using BeautifulSoup
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Extract table rows
+        tariff_table = soup.find('table')
+        if not tariff_table:
+            return "No table found on the webpage."
+        # Extract data and convert it into a structured format
+        data = []
+        table_rows = tariff_table.find_all('tr')
+        for row in table_rows:
+            cols = [col.get_text(strip=True) for col in row.find_all(['th', 'td'])]
+            data.append(cols)
+        # Save the data to a CSV file
+        df = pd.DataFrame(data[1:], columns=data[0])  # Use the first row as headers
+        df.to_csv(output_file, index=False)
         return output_file
     except requests.exceptions.RequestException as e:
 if __name__ == "__main__":
     # Test the scraper and save data to a CSV file
+    url = TARIFF_URLS["IESCO"]
+    output_file = "iesco_tariff_data.csv"
+    result = scrape_tariff_data_to_csv(url, output_file)
     if result.endswith(".csv"):
         print(f"Data successfully saved to {output_file}")
     else: