File size: 3,039 Bytes
4915bbd
 
 
6ef2bb8
85e160f
 
 
 
 
 
 
 
 
 
4915bbd
85e160f
a6f7192
 
6ef2bb8
 
 
 
 
 
a6f7192
85e160f
6ef2bb8
85e160f
6ef2bb8
 
 
85e160f
4915bbd
6ef2bb8
 
 
 
683037f
 
85e160f
 
 
6ef2bb8
85e160f
 
 
6ef2bb8
85e160f
4915bbd
6ef2bb8
 
 
 
4915bbd
6ef2bb8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import requests
from bs4 import BeautifulSoup

# URLs for all electricity companies
TARIFF_URLS = {
    "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
    "FESCO": "https://fesco.com.pk/tariff",
    "HESCO": "http://www.hesco.gov.pk/htmls/tariffs.htm",
    "KE": "https://www.ke.com.pk/customer-services/tariff-structure/",
    "LESCO": "https://www.lesco.gov.pk/ElectricityTariffs",
    "PESCO": "https://pesconlinebill.pk/pesco-tariff/",
    "QESCO": "http://qesco.com.pk/Tariffs.aspx",
    "TESCO": "https://tesco.gov.pk/index.php/electricity-tariff"
}

def scrape_tariff_data(url):
    """
    Scrape tariff data from the given URL.
    
    Args:
        url (str): The URL of the tariff page to scrape.
    
    Returns:
        list: A list of strings representing the rows of tariff data.
    """
    try:
        # Send an HTTP GET request to the specified URL
        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        response.raise_for_status()  # Raise an error for HTTP issues

        # Parse the webpage content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Try to find all <table> elements in the page
        tariff_sections = soup.find_all('table')
        if not tariff_sections:
            return ["No tables found on the webpage."]

        data = []
        for section in tariff_sections:
            table_rows = section.find_all('tr')
            for row in table_rows:
                # Extract text from each <td> or <th> within the row
                row_text = ' | '.join(
                    col.get_text(strip=True) for col in row.find_all(['th', 'td'])
                )
                if row_text:  # Add only rows that have meaningful data
                    data.append(row_text)

        return data if data else ["No data found in the tables."]
    except requests.exceptions.RequestException as e:
        # Handle request errors (e.g., connection issues, timeout)
        return [f"Request error: {e}"]
    except Exception as e:
        # Handle other potential errors
        return [f"An unexpected error occurred: {e}"]

if __name__ == "__main__":
    # Let the user select a company and fetch the corresponding tariff data
    print("Available Companies:")
    for idx, company in enumerate(TARIFF_URLS.keys(), start=1):
        print(f"{idx}. {company}")

    try:
        # User selects a company
        selection = int(input("Enter the number corresponding to the company: "))
        selected_company = list(TARIFF_URLS.keys())[selection - 1]
        url = TARIFF_URLS[selected_company]
        print(f"\nFetching tariff data for {selected_company} ({url})...\n")

        # Scrape and display the data
        tariff_data = scrape_tariff_data(url)
        print("Tariff Data:")
        for row in tariff_data[:10]:  # Show a preview of the first 10 rows
            print(row)
    except (ValueError, IndexError):
        print("Invalid selection. Please choose a valid company number.")