Spaces:
Sleeping
Sleeping
Create scraper.py
Browse files- scraper.py +36 -0
scraper.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scraper.py
|
| 2 |
+
import urllib3
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
|
| 5 |
+
def fetch_tariff_from_url(url, load_type):
|
| 6 |
+
"""Scrape the website to fetch tariff based on load type."""
|
| 7 |
+
try:
|
| 8 |
+
http = urllib3.PoolManager()
|
| 9 |
+
response = http.request("GET", url)
|
| 10 |
+
if response.status == 200:
|
| 11 |
+
soup = BeautifulSoup(response.data, 'html.parser')
|
| 12 |
+
tariff_text = soup.get_text()
|
| 13 |
+
|
| 14 |
+
load_type_mapping = {
|
| 15 |
+
"Residential": "A-1 GENERAL SUPPLY TARIFF RESIDENTIAL",
|
| 16 |
+
"Commercial": "A-2 GENERAL SUPPLY TARIFF COMMERCIAL",
|
| 17 |
+
"Industrial": "B - INDUSTRIAL SUPPLY TARIFFS",
|
| 18 |
+
"Agriculture": "D - AGRICULTURE TARIFF"
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
load_type_label = load_type_mapping.get(load_type, None)
|
| 22 |
+
if load_type_label:
|
| 23 |
+
start_idx = tariff_text.find(load_type_label)
|
| 24 |
+
if start_idx != -1:
|
| 25 |
+
tariff_section = tariff_text[start_idx:start_idx+1000]
|
| 26 |
+
rates = [float(word) for word in tariff_section.split() if word.replace(".", "").isdigit()]
|
| 27 |
+
if rates:
|
| 28 |
+
return rates[0]
|
| 29 |
+
print(f"No tariff data found for {load_type_label}.")
|
| 30 |
+
else:
|
| 31 |
+
print("Invalid load type selected.")
|
| 32 |
+
else:
|
| 33 |
+
print(f"Failed to fetch data. HTTP Status: {response.status}")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f"Error scraping website: {e}")
|
| 36 |
+
return None
|