| | import os |
| | import requests |
| | from bs4 import BeautifulSoup |
| | import pandas as pd |
| | import time |
| | from random import randint |
| |
|
| | def scrape_tariffs(urls): |
| | data = [] |
| | |
| | |
| | os.makedirs("data", exist_ok=True) |
| | |
| | for url in urls: |
| | try: |
| | response = requests.get(url, timeout=10) |
| | response.raise_for_status() |
| |
|
| | |
| | if response.status_code == 200: |
| | soup = BeautifulSoup(response.content, "html.parser") |
| | rows = soup.find_all("tr") |
| |
|
| | for row in rows: |
| | cells = row.find_all("td") |
| | if len(cells) >= 2: |
| | try: |
| | data.append({ |
| | "category": cells[0].text.strip(), |
| | "rate": float(cells[1].text.strip().replace(",", "")), |
| | }) |
| | except ValueError: |
| | continue |
| |
|
| | except requests.exceptions.RequestException as e: |
| | print(f"Error fetching data from {url}: {e}") |
| | print("Retrying...") |
| |
|
| | |
| | retries = 3 |
| | while retries > 0: |
| | time.sleep(randint(1, 3)) |
| | retries -= 1 |
| | try: |
| | response = requests.get(url, timeout=10) |
| | response.raise_for_status() |
| | if response.status_code == 200: |
| | soup = BeautifulSoup(response.content, "html.parser") |
| | rows = soup.find_all("tr") |
| |
|
| | for row in rows: |
| | cells = row.find_all("td") |
| | if len(cells) >= 2: |
| | try: |
| | data.append({ |
| | "category": cells[0].text.strip(), |
| | "rate": float(cells[1].text.strip().replace(",", "")), |
| | }) |
| | except ValueError: |
| | continue |
| | break |
| | except requests.exceptions.RequestException: |
| | print(f"Retry failed: {e}") |
| | continue |
| |
|
| | |
| | time.sleep(randint(2, 5)) |
| |
|
| | if data: |
| | df = pd.DataFrame(data) |
| | |
| | df.to_csv("data/tariffs.csv", index=False) |
| | print("Tariff data saved successfully.") |
| | else: |
| | print("No tariff data found.") |