import sqlite3 import requests from bs4 import BeautifulSoup import random import os DB_PATH = "data/satellites.db" OUTPUT_DIR = "data/debug_pages" def analyze_random_satellites(count=15): os.makedirs(OUTPUT_DIR, exist_ok=True) conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # Get all China satellites satellites = cursor.execute("SELECT satellite_name, url FROM satellites WHERE country_name='China'").fetchall() conn.close() if not satellites: print("No satellites found in DB.") return selected = random.sample(satellites, min(count, len(satellites))) print(f"Analyzing {len(selected)} random satellites...") for name, url in selected: print(f"\nFetching: {name} | {url}") try: resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}) resp.raise_for_status() soup = BeautifulSoup(resp.text, "lxml") # 1. Check for specification table (id="satdata") satdata = soup.find("table", id="satdata") has_satdata = "✅ Found" if satdata else "❌ Missing" # 2. Check for description desc = soup.find("div", id="satdescription") has_desc = "✅ Found" if desc else "❌ Missing" # 3. Check for launch list (id="satlist") satlist = soup.find("table", id="satlist") has_satlist = "✅ Found" if satlist else "❌ Missing" print(f" - SatData: {has_satdata}") print(f" - Desc: {has_desc}") print(f" - Launch: {has_satlist}") # Save HTML for deep inspection if needed safe_name = "".join(c for c in name if c.isalnum() or c in (' ', '-', '_')).strip() with open(f"{OUTPUT_DIR}/{safe_name}.html", "w", encoding="utf-8") as f: f.write(resp.text) except Exception as e: print(f" ❌ Error: {e}") if __name__ == "__main__": analyze_random_satellites()