import sqlite3
import requests
from bs4 import BeautifulSoup
from loguru import logger
import sys

# Setup logging
logger.remove()
logger.add(sys.stderr, format="<level>{level: <8}</level> | <cyan>{message}</cyan>", level="INFO")

DB_PATH = "data/satellites.db"
BASE_URL = "https://space.skyrocket.de/directories/"

def verify_countries(conn):
    logger.info("VERIFYING COUNTRIES...")
    url = "https://space.skyrocket.de/directories/sat_c.htm"
    try:
        resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "lxml")
        
        # Count on page
        page_count = len(soup.select("ul.country-list li"))
        
        # Count in DB
        cursor = conn.cursor()
        db_count = cursor.execute("SELECT COUNT(*) FROM countries").fetchone()[0]
        
        if page_count == db_count:
            logger.info(f"✅ Countries match! Page: {page_count}, DB: {db_count}")
        else:
            logger.warning(f"❌ Countries mismatch! Page: {page_count}, DB: {db_count}")
            
    except Exception as e:
        logger.error(f"Error checking countries: {e}")

def verify_categories(conn, country="China"):
    logger.info(f"VERIFYING CATEGORIES FOR {country}...")
    
    # Get country URL from DB
    cursor = conn.cursor()
    row = cursor.execute("SELECT url FROM countries WHERE country_name=?", (country,)).fetchone()
    if not row:
        logger.error(f"Country {country} not found in DB")
        return
    
    url = row[0]
    
    try:
        resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        soup = BeautifulSoup(resp.text, "lxml")
        
        table = soup.find("table", class_="index")
        if not table:
            logger.warning("No category table found on page")
            return
            
        # Count links inside the table
        page_links = table.select("ul li a")
        page_count = len(page_links)
        
        # Count in DB
        db_count = cursor.execute("SELECT COUNT(*) FROM categories WHERE country_name=?", (country,)).fetchone()[0]
        
        if page_count == db_count:
            logger.info(f"✅ Categories match for {country}! Page: {page_count}, DB: {db_count}")
        else:
            logger.warning(f"❌ Categories mismatch for {country}! Page: {page_count}, DB: {db_count}")
            # Optional: Find which ones are missing
            db_cats = [r[0] for r in cursor.execute("SELECT category_name FROM categories WHERE country_name=?", (country,)).fetchall()]
            page_cats = [l.text.strip() for l in page_links]
            missing = set(page_cats) - set(db_cats)
            if missing:
                logger.warning(f"Missing in DB: {missing}")

    except Exception as e:
        logger.error(f"Error checking categories: {e}")

def verify_satellites(conn, country="China", sample_size=3):
    logger.info(f"VERIFYING SATELLITES FOR {country}...")
    
    cursor = conn.cursor()
    categories = cursor.execute("SELECT category_name, url FROM categories WHERE country_name=?", (country,)).fetchall()
    
    import random
    sampled_categories = random.sample(categories, min(sample_size, len(categories)))
    
    for cat_name, url in sampled_categories:
        logger.info(f"  Checking Category: {cat_name}")
        try:
            resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
            soup = BeautifulSoup(resp.text, "lxml")
            
            table = soup.find("table", class_="index")
            if not table:
                logger.warning("    No satellite table found query skipping")
                continue
                
            # Logic to count total satellites on page
            # We must use logic similar to scraper: Iterate rows, find 3rd column, count links
            page_sat_count = 0
            rows = table.find_all("tr")
            for row in rows:
                cols = row.find_all("td")
                # Need robust logic from scraper or simplier approximation?
                # Simple approximation: find all links in the last column
                # But column index varies (2 or 3).
                # Let's count ALL links in the table that look like satellite links
                # Usually href="../doc_sdat/..."
                links = table.find_all("a")
                valid_links = [l for l in links if "doc_sdat" in l.get("href", "") and "cancelled" not in l.parent.get("class", [])]
                
                # Deduplicate based on HREF because sometimes text differs but link is same? 
                # Actually scraper inserts every link.
                page_sat_count = len(valid_links)

            # DB Count
            db_count = cursor.execute("SELECT COUNT(*) FROM satellites WHERE country_name=? AND category_name=?", (country, cat_name)).fetchone()[0]
            
            if page_sat_count == db_count:
                logger.info(f"    ✅ Match! Page: {page_sat_count}, DB: {db_count}")
            else:
                logger.warning(f"    ⚠️ Mismatch. Page (heuristic): {page_sat_count}, DB: {db_count}")
                # Note: Heuristic might be imperfect if scraper ignores some specific links or table structure is weird
                
        except Exception as e:
            logger.error(f"    Error: {e}")

if __name__ == "__main__":
    conn = sqlite3.connect(DB_PATH)
    verify_countries(conn)
    verify_categories(conn)
    verify_satellites(conn)
    conn.close()