| | import sqlite3 |
| | import requests |
| | from bs4 import BeautifulSoup |
| | from loguru import logger |
| | import sys |
| |
|
| | |
| | logger.remove() |
| | logger.add(sys.stderr, format="<level>{level: <8}</level> | <cyan>{message}</cyan>", level="INFO") |
| |
|
| | DB_PATH = "data/satellites.db" |
| | BASE_URL = "https://space.skyrocket.de/directories/" |
| |
|
| | def verify_countries(conn): |
| | logger.info("VERIFYING COUNTRIES...") |
| | url = "https://space.skyrocket.de/directories/sat_c.htm" |
| | try: |
| | resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}) |
| | resp.raise_for_status() |
| | soup = BeautifulSoup(resp.text, "lxml") |
| | |
| | |
| | page_count = len(soup.select("ul.country-list li")) |
| | |
| | |
| | cursor = conn.cursor() |
| | db_count = cursor.execute("SELECT COUNT(*) FROM countries").fetchone()[0] |
| | |
| | if page_count == db_count: |
| | logger.info(f"✅ Countries match! Page: {page_count}, DB: {db_count}") |
| | else: |
| | logger.warning(f"❌ Countries mismatch! Page: {page_count}, DB: {db_count}") |
| | |
| | except Exception as e: |
| | logger.error(f"Error checking countries: {e}") |
| |
|
| | def verify_categories(conn, country="China"): |
| | logger.info(f"VERIFYING CATEGORIES FOR {country}...") |
| | |
| | |
| | cursor = conn.cursor() |
| | row = cursor.execute("SELECT url FROM countries WHERE country_name=?", (country,)).fetchone() |
| | if not row: |
| | logger.error(f"Country {country} not found in DB") |
| | return |
| | |
| | url = row[0] |
| | |
| | try: |
| | resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}) |
| | soup = BeautifulSoup(resp.text, "lxml") |
| | |
| | table = soup.find("table", class_="index") |
| | if not table: |
| | logger.warning("No category table found on page") |
| | return |
| | |
| | |
| | page_links = table.select("ul li a") |
| | page_count = len(page_links) |
| | |
| | |
| | db_count = cursor.execute("SELECT COUNT(*) FROM categories WHERE country_name=?", (country,)).fetchone()[0] |
| | |
| | if page_count == db_count: |
| | logger.info(f"✅ Categories match for {country}! Page: {page_count}, DB: {db_count}") |
| | else: |
| | logger.warning(f"❌ Categories mismatch for {country}! Page: {page_count}, DB: {db_count}") |
| | |
| | db_cats = [r[0] for r in cursor.execute("SELECT category_name FROM categories WHERE country_name=?", (country,)).fetchall()] |
| | page_cats = [l.text.strip() for l in page_links] |
| | missing = set(page_cats) - set(db_cats) |
| | if missing: |
| | logger.warning(f"Missing in DB: {missing}") |
| |
|
| | except Exception as e: |
| | logger.error(f"Error checking categories: {e}") |
| |
|
| | def verify_satellites(conn, country="China", sample_size=3): |
| | logger.info(f"VERIFYING SATELLITES FOR {country}...") |
| | |
| | cursor = conn.cursor() |
| | categories = cursor.execute("SELECT category_name, url FROM categories WHERE country_name=?", (country,)).fetchall() |
| | |
| | import random |
| | sampled_categories = random.sample(categories, min(sample_size, len(categories))) |
| | |
| | for cat_name, url in sampled_categories: |
| | logger.info(f" Checking Category: {cat_name}") |
| | try: |
| | resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}) |
| | soup = BeautifulSoup(resp.text, "lxml") |
| | |
| | table = soup.find("table", class_="index") |
| | if not table: |
| | logger.warning(" No satellite table found query skipping") |
| | continue |
| | |
| | |
| | |
| | page_sat_count = 0 |
| | rows = table.find_all("tr") |
| | for row in rows: |
| | cols = row.find_all("td") |
| | |
| | |
| | |
| | |
| | |
| | links = table.find_all("a") |
| | valid_links = [l for l in links if "doc_sdat" in l.get("href", "") and "cancelled" not in l.parent.get("class", [])] |
| | |
| | |
| | |
| | page_sat_count = len(valid_links) |
| |
|
| | |
| | db_count = cursor.execute("SELECT COUNT(*) FROM satellites WHERE country_name=? AND category_name=?", (country, cat_name)).fetchone()[0] |
| | |
| | if page_sat_count == db_count: |
| | logger.info(f" ✅ Match! Page: {page_sat_count}, DB: {db_count}") |
| | else: |
| | logger.warning(f" ⚠️ Mismatch. Page (heuristic): {page_sat_count}, DB: {db_count}") |
| | |
| | |
| | except Exception as e: |
| | logger.error(f" Error: {e}") |
| |
|
| | if __name__ == "__main__": |
| | conn = sqlite3.connect(DB_PATH) |
| | verify_countries(conn) |
| | verify_categories(conn) |
| | verify_satellites(conn) |
| | conn.close() |
| |
|